fil_rustacuda/
context.rs

1//! # CUDA context management
2//!
3//! Most CUDA functions require a context. A CUDA context is analogous to a CPU process - it's
4//! an isolated container for all runtime state, including configuration settings and the
5//! device/unified/page-locked memory allocations. Each context has a separate memory space, and
6//! pointers from one context do not work in another. Each context is associated with a single
7//! device. Although it is possible to have multiple contexts associated with a single device, this
8//! is strongly discouraged as it can cause a significant loss of performance.
9//!
10//! CUDA keeps a thread-local stack of contexts which the programmer can push to or pop from.
11//! The top context in that stack is known as the "current" context and it is used in most CUDA
12//! API calls. One context can be safely made current in multiple CPU threads.
13//!
14//! # Safety
15//!
16//! The CUDA context management API does not fit easily into Rust's safety guarantees.
17//!
18//! The thread-local stack (as well as the fact that any context can be on the stack for any number
19//! of threads) means there is no clear owner for a CUDA context, but it still has to be cleaned up.
20//! Also, the fact that a context can be current to multiple threads at once means that there can be
21//! multiple implicit references to a context which are not controlled by Rust.
22//!
23//! RustaCUDA handles ownership by providing an owning [`Context`](struct.Context.html) struct and
24//! a non-owning [`UnownedContext`](struct.UnownedContext.html). When the `Context` is dropped, the
25//! backing context is destroyed. The context could be current on other threads, though. In this
26//! case, the context is still destroyed, and attempts to access the context on other threads will
27//! fail with an error. This is (mostly) safe, if a bit inconvenient. It's only mostly safe because
28//! other threads could be accessing that context while the destructor is running on this thread,
29//! which could result in undefined behavior.
30//!
31//! In short, Rust's thread-safety guarantees cannot fully protect use of the context management
32//! functions. The programmer must ensure that no other OS threads are using the `Context` when it
33//! is dropped.
34//!
35//! # Examples
36//!
37//! For most commmon uses (one device, one OS thread) it should suffice to create a single context:
38//!
39//! ```
40//! use rustacuda::device::Device;
41//! use rustacuda::context::{Context, ContextFlags};
42//! # use std::error::Error;
43//! # fn main () -> Result<(), Box<dyn Error>> {
44//!
45//! rustacuda::init(rustacuda::CudaFlags::empty())?;
46//! let device = Device::get_device(0)?;
47//! let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
48//! // call RustaCUDA functions which use the context
49//!
50//! // The context will be destroyed when dropped or it falls out of scope.
51//! drop(context);
52//! # Ok(())
53//! # }
54//! ```
55//!
56//! If you have multiple OS threads that each submit work to the same device, you can get a handle
57//! to the single context and pass it to each thread.
58//!
59//! ```
60//! # use rustacuda::context::{Context, ContextFlags, CurrentContext};
61//! # use rustacuda::device::Device;
62//! # use std::error::Error;
63//! # fn main() -> Result<(), Box<dyn Error>> {
64//! # rustacuda::init(rustacuda::CudaFlags::empty())?;
65//! # let device = Device::get_device(0)?;
66//! // As before
67//! let context =
68//!     Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
69//! let mut join_handles = vec![];
70//!
71//! for _ in 0..4 {
72//!     let unowned = context.get_unowned();
73//!     let join_handle = std::thread::spawn(move || {
74//!         CurrentContext::set_current(&unowned).unwrap();
75//!         // Call RustaCUDA functions which use the context
76//!     });
77//!     join_handles.push(join_handle);
78//! }
79//! // We must ensure that the other threads are not using the context when it's destroyed.
80//! for handle in join_handles {
81//!     handle.join().unwrap();
82//! }
83//! // Now it's safe to drop the context.
84//! drop(context);
85//! # Ok(())
86//! # }
87//! ```
88//!
89//! If you have multiple devices, each device needs its own context.
90//!
91//! ```
92//! # use rustacuda::device::Device;
93//! # use rustacuda::context::{Context, ContextStack, ContextFlags, CurrentContext};
94//! # use std::error::Error;
95//! #
96//! # fn main() -> Result<(), Box<dyn Error>> {
97//! # rustacuda::init(rustacuda::CudaFlags::empty())?;
98//! // Create and pop contexts for each device
99//! let mut contexts = vec![];
100//! for device in Device::devices()? {
101//!     let device = device?;
102//!     let ctx =
103//!         Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
104//!     ContextStack::pop()?;
105//!     contexts.push(ctx);
106//! }
107//! CurrentContext::set_current(&contexts[0])?;
108//!
109//! // Call RustaCUDA functions which will use the context
110//!
111//! # Ok(())
112//! # }
113//! ```
114
115use crate::device::Device;
116use crate::error::{CudaResult, DropResult, ToResult};
117use crate::private::Sealed;
118use crate::CudaApiVersion;
119use cuda_driver_sys::CUcontext;
120use std::mem;
121use std::mem::transmute;
122use std::ptr;
123
124/// This enumeration represents configuration settings for devices which share hardware resources
125/// between L1 cache and shared memory.
126///
127/// Note that this is only a preference - the driver will use the requested configuration if
128/// possible, but it is free to choose a different configuration if required to execute functions.
129///
130/// See
131/// [CurrentContext::get_cache_config](struct.CurrentContext.html#method.get_cache_config) and
132/// [CurrentContext::set_cache_config](struct.CurrentContext.html#method.set_cache_config) to get
133/// and set the cache config for the current context.
134#[repr(u32)]
135#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
136pub enum CacheConfig {
137    /// No preference for shared memory or L1 (default)
138    PreferNone = 0,
139    /// Prefer larger shared memory and smaller L1 cache
140    PreferShared = 1,
141    /// Prefer larger L1 cache and smaller shared memory
142    PreferL1 = 2,
143    /// Prefer equal-sized L1 cache and shared memory
144    PreferEqual = 3,
145
146    #[doc(hidden)]
147    __Nonexhaustive,
148}
149
150/// This enumeration represents the limited resources which can be accessed through
151/// [CurrentContext::get_resource_limit](struct.CurrentContext.html#method.get_resource_limit) and
152/// [CurrentContext::set_resource_limit](struct.CurrentContext.html#method.set_resource_limit).
153#[repr(u32)]
154#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
155pub enum ResourceLimit {
156    /// The size in bytes of each GPU thread stack
157    StackSize = 0,
158    /// The size in bytes of the FIFO used by the `printf()` device system call.
159    PrintfFifoSize = 1,
160    /// The size in bytes of the heap used by the `malloc()` and `free()` device system calls.
161    ///
162    /// Note that this is used for memory allocated within a kernel launch; it is not related to the
163    /// device memory allocated by the host.
164    MallocHeapSize = 2,
165    /// The maximum nesting depth of a grid at which a thread can safely call
166    /// `cudaDeviceSynchronize()` to wait on child grid launches to complete.
167    DeviceRuntimeSynchronizeDepth = 3,
168    /// The maximum number of outstanding device runtime launches that can be made from the current
169    /// context.
170    DeviceRuntimePendingLaunchCount = 4,
171    /// L2 cache fetch granularity
172    MaxL2FetchGranularity = 5,
173
174    #[doc(hidden)]
175    __Nonexhaustive,
176}
177
178/// This enumeration represents the options for configuring the shared memory bank size.
179///
180/// See
181/// [CurrentContext::get_shared_memory_config](struct.CurrentContext.html#method.get_shared_memory_config) and
182/// [CurrentContext::set_shared_memory_config](struct.CurrentContext.html#method.set_shared_memory_config) to get
183/// and set the cache config for the current context.
184#[repr(u32)]
185#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
186pub enum SharedMemoryConfig {
187    /// Set shared-memory bank size to the default.
188    DefaultBankSize = 0,
189    /// Set shared-memory bank width to four bytes
190    FourByteBankSize = 1,
191    /// Set shared-memory bank width to eight bytes
192    EightByteBankSize = 2,
193
194    #[doc(hidden)]
195    __Nonexhaustive,
196}
197
198bitflags! {
199    /// Bit flags for initializing the CUDA context.
200    ///
201    /// If you're not sure which flags to use, `MAP_HOST | SCHED_AUTO` is a good default.
202    pub struct ContextFlags: u32 {
203        /// Instructs CUDA to actively spin when waiting for results from the GPU. This can decrease
204        /// latency when waiting for the GPU, but may lower the performance of other CPU threads
205        /// if they are performing work in parallel with the CUDA thread.
206        const SCHED_SPIN = 0x01;
207
208        /// Instructs CUDA to yield its thread when waiting for results from the GPU. This can
209        /// increase latency when waiting for the GPU, but can increase the performance of CPU
210        /// threads performing work in parallel with the GPU.
211        const SCHED_YIELD = 0x02;
212
213        /// Instructs CUDA to block the CPU thread on a synchronization primitive when waiting for
214        /// the GPU to finish work.
215        const SCHED_BLOCKING_SYNC = 0x04;
216
217        /// Instructs CUDA to automatically choose whether to yield to other OS threads while waiting
218        /// for the GPU, or to spin the OS thread. This is the default.
219        const SCHED_AUTO = 0x00;
220
221        /// Instructs CUDA to support mapped pinned allocations. This flag must be set in order to
222        /// use page-locked memory (see [LockedBuffer](../memory/struct.LockedBuffer.html])).
223        const MAP_HOST = 0x08;
224
225        /// Instruct CUDA not to reduce local memory after resizing local memory for a kernel. This
226        /// can prevent thrashing by local memory allocations when launching many kernels with high
227        /// local memory usage at the cost of potentially increased memory usage.
228        const LMEM_RESIZE_TO_MAX = 0x10;
229    }
230}
231
232/// Owned handle to a CUDA context.
233///
234/// The context will be destroyed when this goes out of scope. If this is the current context on
235/// the current OS thread, the next context on the stack (if any) will be made current. Note that
236/// the context will be destroyed even if other threads are still using it. Attempts to access the
237/// destroyed context from another thread will return an error.
238#[derive(Debug)]
239pub struct Context {
240    inner: CUcontext,
241}
242impl Context {
243    /// Create a CUDA context for the given device.
244    ///
245    /// # Example
246    ///
247    /// ```
248    /// # use rustacuda::device::Device;
249    /// # use rustacuda::context::{Context, ContextFlags};
250    /// # use std::error::Error;
251    /// #
252    /// # fn main () -> Result<(), Box<dyn Error>> {
253    /// rustacuda::init(rustacuda::CudaFlags::empty())?;
254    /// let device = Device::get_device(0)?;
255    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
256    /// # Ok(())
257    /// # }
258    /// ```
259    pub fn create_and_push(flags: ContextFlags, device: Device) -> CudaResult<Context> {
260        unsafe {
261            // CUDA only provides a create-and-push operation, but that makes it hard to provide
262            // lifetime guarantees so we create-and-push, then pop, then the programmer has to
263            // push again.
264            let mut ctx: CUcontext = ptr::null_mut();
265            cuda_driver_sys::cuCtxCreate_v2(
266                &mut ctx as *mut CUcontext,
267                flags.bits(),
268                device.into_inner(),
269            )
270            .to_result()?;
271            Ok(Context { inner: ctx })
272        }
273    }
274
275    /// Get the API version used to create this context.
276    ///
277    /// This is not necessarily the latest version supported by the driver.
278    ///
279    /// # Example
280    ///
281    /// ```
282    /// # use rustacuda::device::Device;
283    /// # use rustacuda::context::{Context, ContextFlags};
284    /// # use std::error::Error;
285    /// #
286    /// # fn main () -> Result<(), Box<dyn Error>> {
287    /// rustacuda::init(rustacuda::CudaFlags::empty())?;
288    /// let device = Device::get_device(0)?;
289    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
290    /// let version = context.get_api_version()?;
291    /// # Ok(())
292    /// # }
293    /// ```
294    pub fn get_api_version(&self) -> CudaResult<CudaApiVersion> {
295        unsafe {
296            let mut api_version = 0u32;
297            cuda_driver_sys::cuCtxGetApiVersion(self.inner, &mut api_version as *mut u32)
298                .to_result()?;
299            Ok(CudaApiVersion {
300                version: api_version as i32,
301            })
302        }
303    }
304
305    /// Returns an non-owning handle to this context.
306    ///
307    /// This is useful for sharing a single context between threads (though see the module-level
308    /// documentation for safety details!).
309    ///
310    /// # Example
311    ////*  */
312    /// ```
313    /// # use rustacuda::device::Device;
314    /// # use rustacuda::context::{Context, ContextFlags};
315    /// # use std::error::Error;
316    /// #
317    /// # fn main() -> Result<(), Box<dyn Error>> {
318    /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
319    /// # let device = Device::get_device(0)?;
320    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
321    /// let unowned = context.get_unowned();
322    /// # Ok(())
323    /// # }
324    /// ```
325    pub fn get_unowned(&self) -> UnownedContext {
326        UnownedContext { inner: self.inner }
327    }
328
329    /// Destroy a `Context`, returning an error.
330    ///
331    /// Destroying a context can return errors from previous asynchronous work. This function
332    /// destroys the given context and returns the error and the un-destroyed context on failure.
333    ///
334    /// # Example
335    ///
336    /// ```
337    /// # use rustacuda::device::Device;
338    /// # use rustacuda::context::{Context, ContextFlags};
339    /// # use std::error::Error;
340    /// #
341    /// # fn main () -> Result<(), Box<dyn Error>> {
342    /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
343    /// # let device = Device::get_device(0)?;
344    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
345    /// match Context::drop(context) {
346    ///     Ok(()) => println!("Successfully destroyed"),
347    ///     Err((e, ctx)) => {
348    ///         println!("Failed to destroy context: {:?}", e);
349    ///         // Do something with ctx
350    ///     },
351    /// }
352    /// # Ok(())
353    /// # }
354    /// ```
355    pub fn drop(mut ctx: Context) -> DropResult<Context> {
356        if ctx.inner.is_null() {
357            return Ok(());
358        }
359
360        unsafe {
361            let inner = mem::replace(&mut ctx.inner, ptr::null_mut());
362            match cuda_driver_sys::cuCtxDestroy_v2(inner).to_result() {
363                Ok(()) => {
364                    mem::forget(ctx);
365                    Ok(())
366                }
367                Err(e) => Err((e, Context { inner })),
368            }
369        }
370    }
371}
372impl Drop for Context {
373    fn drop(&mut self) {
374        if self.inner.is_null() {
375            return;
376        }
377
378        unsafe {
379            let inner = mem::replace(&mut self.inner, ptr::null_mut());
380            // No choice but to panic here.
381            cuda_driver_sys::cuCtxDestroy_v2(inner)
382                .to_result()
383                .expect("Failed to destroy context");
384        }
385    }
386}
387
388/// Sealed trait for `Context` and `UnownedContext`. Not intended for use outside of RustaCUDA.
389pub trait ContextHandle: Sealed {
390    #[doc(hidden)]
391    fn get_inner(&self) -> CUcontext;
392}
393impl Sealed for Context {}
394impl ContextHandle for Context {
395    fn get_inner(&self) -> CUcontext {
396        self.inner
397    }
398}
399impl Sealed for UnownedContext {}
400impl ContextHandle for UnownedContext {
401    fn get_inner(&self) -> CUcontext {
402        self.inner
403    }
404}
405
406/// Non-owning handle to a CUDA context.
407#[derive(Debug, Clone)]
408pub struct UnownedContext {
409    inner: CUcontext,
410}
411unsafe impl Send for UnownedContext {}
412unsafe impl Sync for UnownedContext {}
413impl UnownedContext {
414    /// Get the API version used to create this context.
415    ///
416    /// This is not necessarily the latest version supported by the driver.
417    ///
418    /// # Example
419    ///
420    /// ```
421    /// # use rustacuda::device::Device;
422    /// # use rustacuda::context::{Context, ContextFlags};
423    /// # use std::error::Error;
424    /// #
425    /// # fn main () -> Result<(), Box<dyn Error>> {
426    /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
427    /// # let device = Device::get_device(0)?;
428    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
429    /// let unowned = context.get_unowned();
430    /// let version = unowned.get_api_version()?;
431    /// #
432    /// # Ok(())
433    /// # }
434    /// ```
435    pub fn get_api_version(&self) -> CudaResult<CudaApiVersion> {
436        unsafe {
437            let mut api_version = 0u32;
438            cuda_driver_sys::cuCtxGetApiVersion(self.inner, &mut api_version as *mut u32)
439                .to_result()?;
440            Ok(CudaApiVersion {
441                version: api_version as i32,
442            })
443        }
444    }
445}
446
447/// Type used to represent the thread-local context stack.
448#[derive(Debug)]
449pub struct ContextStack;
450impl ContextStack {
451    /// Pop the current context off the stack and return the handle. That context may then be made
452    /// current again (perhaps on a different CPU thread) by calling [push](#method.push).
453    ///
454    /// # Example
455    ///
456    /// ```
457    /// # use rustacuda::device::Device;
458    /// # use rustacuda::context::{Context, ContextFlags, ContextStack};
459    /// # use std::error::Error;
460    /// #
461    /// # fn main () -> Result<(), Box<dyn Error>> {
462    /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
463    /// # let device = Device::get_device(0)?;
464    /// # let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
465    /// let unowned = ContextStack::pop()?;
466    /// #
467    /// # Ok(())
468    /// # }
469    /// ```
470    pub fn pop() -> CudaResult<UnownedContext> {
471        unsafe {
472            let mut ctx: CUcontext = ptr::null_mut();
473            cuda_driver_sys::cuCtxPopCurrent_v2(&mut ctx as *mut CUcontext).to_result()?;
474            Ok(UnownedContext { inner: ctx })
475        }
476    }
477
478    /// Push the given context to the top of the stack
479    ///
480    /// # Example
481    ///
482    /// ```
483    /// # use rustacuda::device::Device;
484    /// # use rustacuda::context::{Context, ContextFlags, ContextStack};
485    /// # use std::error::Error;
486    /// #
487    /// # fn main () -> Result<(), Box<dyn Error>> {
488    /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
489    /// # let device = Device::get_device(0)?;
490    /// # let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
491    /// let unowned = ContextStack::pop()?;
492    /// ContextStack::push(&unowned)?;
493    /// # Ok(())
494    /// # }
495    /// ```
496    pub fn push<C: ContextHandle>(ctx: &C) -> CudaResult<()> {
497        unsafe {
498            cuda_driver_sys::cuCtxPushCurrent_v2(ctx.get_inner()).to_result()?;
499            Ok(())
500        }
501    }
502}
503
504/// Struct representing a range of stream priorities.
505///
506/// By convention, lower numbers imply greater priorities. The range of meaningful stream priorities
507/// is given by `[greatest, least]` - that is (numerically), `greatest <= least`.
508#[derive(Debug, Clone, Hash, Eq, PartialEq)]
509pub struct StreamPriorityRange {
510    /// The least stream priority
511    pub least: i32,
512    /// The greatest stream priority
513    pub greatest: i32,
514}
515
516/// Type representing the top context in the thread-local stack.
517#[derive(Debug)]
518pub struct CurrentContext;
519impl CurrentContext {
520    /// Returns the preferred cache configuration for the current context.
521    ///
522    /// On devices where the L1 cache and shared memory use the same hardware resources, this
523    /// function returns the preferred cache configuration for the current context. For devices
524    /// where the size of the L1 cache and shared memory are fixed, this will always return
525    /// `CacheConfig::PreferNone`.
526    ///
527    /// # Example
528    ///
529    /// ```
530    /// # use rustacuda::device::Device;
531    /// # use rustacuda::context::{ Context, ContextFlags, CurrentContext };
532    /// # use std::error::Error;
533    /// #
534    /// # fn main () -> Result<(), Box<dyn Error>> {
535    /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
536    /// # let device = Device::get_device(0)?;
537    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
538    /// let cache_config = CurrentContext::get_cache_config()?;
539    /// # Ok(())
540    /// # }
541    /// ```
542    pub fn get_cache_config() -> CudaResult<CacheConfig> {
543        unsafe {
544            let mut config = CacheConfig::PreferNone;
545            cuda_driver_sys::cuCtxGetCacheConfig(
546                &mut config as *mut CacheConfig as *mut cuda_driver_sys::CUfunc_cache,
547            )
548            .to_result()?;
549            Ok(config)
550        }
551    }
552
553    /// Return the device ID for the current context.
554    ///
555    /// # Example
556    ///
557    /// ```
558    /// # use rustacuda::device::Device;
559    /// # use rustacuda::context::{ Context, ContextFlags, CurrentContext };
560    /// # use std::error::Error;
561    /// #
562    /// # fn main () -> Result<(), Box<dyn Error>> {
563    /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
564    /// # let device = Device::get_device(0)?;
565    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
566    /// let device = CurrentContext::get_device()?;
567    /// # Ok(())
568    /// # }
569    /// ```
570    pub fn get_device() -> CudaResult<Device> {
571        unsafe {
572            let mut device = Device { device: 0 };
573            cuda_driver_sys::cuCtxGetDevice(&mut device.device as *mut cuda_driver_sys::CUdevice)
574                .to_result()?;
575            Ok(device)
576        }
577    }
578
579    /// Return the context flags for the current context.
580    ///
581    /// # Example
582    ///
583    /// ```
584    /// # use rustacuda::device::Device;
585    /// # use rustacuda::context::{ Context, ContextFlags, CurrentContext };
586    /// # use std::error::Error;
587    /// #
588    /// # fn main () -> Result<(), Box<dyn Error>> {
589    /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
590    /// # let device = Device::get_device(0)?;
591    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
592    /// let flags = CurrentContext::get_flags()?;
593    /// # Ok(())
594    /// # }
595    /// ```
596    pub fn get_flags() -> CudaResult<ContextFlags> {
597        unsafe {
598            let mut flags = 0u32;
599            cuda_driver_sys::cuCtxGetFlags(&mut flags as *mut u32).to_result()?;
600            Ok(ContextFlags::from_bits_truncate(flags))
601        }
602    }
603
604    /// Return resource limits for the current context.
605    ///
606    /// # Example
607    ///
608    /// ```
609    /// # use rustacuda::device::Device;
610    /// # use rustacuda::context::{ Context, ContextFlags, CurrentContext, ResourceLimit };
611    /// # use std::error::Error;
612    /// #
613    /// # fn main () -> Result<(), Box<dyn Error>> {
614    /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
615    /// # let device = Device::get_device(0)?;
616    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
617    /// let stack_size = CurrentContext::get_resource_limit(ResourceLimit::StackSize)?;
618    /// # Ok(())
619    /// # }
620    /// ```
621    pub fn get_resource_limit(resource: ResourceLimit) -> CudaResult<usize> {
622        unsafe {
623            let mut limit: usize = 0;
624            cuda_driver_sys::cuCtxGetLimit(&mut limit as *mut usize, transmute(resource))
625                .to_result()?;
626            Ok(limit)
627        }
628    }
629
630    /// Return resource limits for the current context.
631    ///
632    /// # Example
633    ///
634    /// ```
635    /// # use rustacuda::device::Device;
636    /// # use rustacuda::context::{ Context, ContextFlags, CurrentContext, ResourceLimit };
637    /// # use std::error::Error;
638    /// #
639    /// # fn main () -> Result<(), Box<dyn Error>> {
640    /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
641    /// # let device = Device::get_device(0)?;
642    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
643    /// let shared_mem_config = CurrentContext::get_shared_memory_config()?;
644    /// # Ok(())
645    /// # }
646    /// ```
647    pub fn get_shared_memory_config() -> CudaResult<SharedMemoryConfig> {
648        unsafe {
649            let mut cfg = SharedMemoryConfig::DefaultBankSize;
650            cuda_driver_sys::cuCtxGetSharedMemConfig(
651                &mut cfg as *mut SharedMemoryConfig as *mut cuda_driver_sys::CUsharedconfig,
652            )
653            .to_result()?;
654            Ok(cfg)
655        }
656    }
657
658    /// Return the least and greatest stream priorities.
659    ///
660    /// If the program attempts to create a stream with a priority outside of this range, it will be
661    /// automatically clamped to within the valid range. If the device does not support stream
662    /// priorities, the returned range will contain zeroes.
663    ///
664    /// # Example
665    ///
666    /// ```
667    /// # use rustacuda::device::Device;
668    /// # use rustacuda::context::{ Context, ContextFlags, CurrentContext};
669    /// # use std::error::Error;
670    /// #
671    /// # fn main () -> Result<(), Box<dyn Error>> {
672    /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
673    /// # let device = Device::get_device(0)?;
674    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
675    /// let priority_range = CurrentContext::get_stream_priority_range()?;
676    /// # Ok(())
677    /// # }
678    /// ```
679    pub fn get_stream_priority_range() -> CudaResult<StreamPriorityRange> {
680        unsafe {
681            let mut range = StreamPriorityRange {
682                least: 0,
683                greatest: 0,
684            };
685            cuda_driver_sys::cuCtxGetStreamPriorityRange(
686                &mut range.least as *mut i32,
687                &mut range.greatest as *mut i32,
688            )
689            .to_result()?;
690            Ok(range)
691        }
692    }
693
694    /// Sets the preferred cache configuration for the current context.
695    ///
696    /// On devices where L1 cache and shared memory use the same hardware resources, this sets the
697    /// preferred cache configuration for the current context. This is only a preference. The
698    /// driver will use the requested configuration if possible, but is free to choose a different
699    /// configuration if required to execute the function.
700    ///
701    /// This setting does nothing on devices where the size of the L1 cache and shared memory are
702    /// fixed.
703    ///
704    /// # Example
705    ///
706    /// ```
707    /// # use rustacuda::device::Device;
708    /// # use rustacuda::context::{ Context, ContextFlags, CurrentContext, CacheConfig };
709    /// # use std::error::Error;
710    /// #
711    /// # fn main () -> Result<(), Box<dyn Error>> {
712    /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
713    /// # let device = Device::get_device(0)?;
714    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
715    /// CurrentContext::set_cache_config(CacheConfig::PreferL1)?;
716    /// # Ok(())
717    /// # }
718    /// ```
719    pub fn set_cache_config(cfg: CacheConfig) -> CudaResult<()> {
720        unsafe { cuda_driver_sys::cuCtxSetCacheConfig(transmute(cfg)).to_result() }
721    }
722
723    /// Sets a requested resource limit for the current context.
724    ///
725    /// Note that this is only a request; the driver is free to modify the requested value to meet
726    /// hardware requirements. Each limit has some specific restrictions.
727    ///
728    /// * `StackSize`: Controls the stack size in bytes for each GPU thread
729    /// * `PrintfFifoSize`: Controls the size in bytes of the FIFO used by the `printf()` device
730    ///   system call. This cannot be changed after a kernel has been launched which uses the
731    ///   `printf()` function.
732    /// * `MallocHeapSize`: Controls the size in bytes of the heap used by the `malloc()` and `free()`
733    ///   device system calls. This cannot be changed aftr a kernel has been launched which uses the
734    ///   `malloc()` and `free()` system calls.
735    /// * `DeviceRuntimeSyncDepth`: Controls the maximum nesting depth of a grid at which a thread
736    ///   can safely call `cudaDeviceSynchronize()`. This cannot be changed after a kernel has been
737    ///   launched which uses the device runtime. When setting this limit, keep in mind that
738    ///   additional levels of sync depth require the driver to reserve large amounts of device
739    ///   memory which can no longer be used for device allocations.
740    /// * `DeviceRuntimePendingLaunchCount`: Controls the maximum number of outstanding device
741    ///    runtime launches that can be made from the current context. A grid is outstanding from
742    ///    the point of the launch up until the grid is known to have completed. Keep in mind that
743    ///    increasing this limit will require the driver to reserve larger amounts of device memory
744    ///    which can no longer be used for device allocations.
745    /// * `MaxL2FetchGranularity`: Controls the L2 fetch granularity. This is purely a performance
746    ///    hint and it can be ignored or clamped depending on the platform.
747    ///
748    /// # Example
749    ///
750    /// ```
751    /// # use rustacuda::device::Device;
752    /// # use rustacuda::context::{ Context, ContextFlags, CurrentContext, ResourceLimit };
753    /// # use std::error::Error;
754    /// #
755    /// # fn main () -> Result<(), Box<dyn Error>> {
756    /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
757    /// # let device = Device::get_device(0)?;
758    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
759    /// CurrentContext::set_resource_limit(ResourceLimit::StackSize, 2048)?;
760    /// # Ok(())
761    /// # }
762    /// ```
763    pub fn set_resource_limit(resource: ResourceLimit, limit: usize) -> CudaResult<()> {
764        unsafe {
765            cuda_driver_sys::cuCtxSetLimit(transmute(resource), limit).to_result()?;
766            Ok(())
767        }
768    }
769
770    /// Sets the preferred shared memory configuration for the current context.
771    ///
772    /// On devices with configurable shared memory banks, this function will set the context's
773    /// shared memory bank size which is used for subsequent kernel launches.
774    ///
775    /// # Example
776    ///
777    /// ```
778    /// # use rustacuda::device::Device;
779    /// # use rustacuda::context::{ Context, ContextFlags, CurrentContext, SharedMemoryConfig };
780    /// # use std::error::Error;
781    /// #
782    /// # fn main () -> Result<(), Box<dyn Error>> {
783    /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
784    /// # let device = Device::get_device(0)?;
785    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
786    /// CurrentContext::set_shared_memory_config(SharedMemoryConfig::DefaultBankSize)?;
787    /// # Ok(())
788    /// # }
789    /// ```
790    pub fn set_shared_memory_config(cfg: SharedMemoryConfig) -> CudaResult<()> {
791        unsafe { cuda_driver_sys::cuCtxSetSharedMemConfig(transmute(cfg)).to_result() }
792    }
793
794    /// Returns a non-owning handle to the current context.
795    ///
796    /// # Example
797    ///
798    /// ```
799    /// # use rustacuda::device::Device;
800    /// # use rustacuda::context::{ Context, ContextFlags, CurrentContext };
801    /// # use std::error::Error;
802    /// #
803    /// # fn main () -> Result<(), Box<dyn Error>> {
804    /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
805    /// # let device = Device::get_device(0)?;
806    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
807    /// let unowned = CurrentContext::get_current()?;
808    /// # Ok(())
809    /// # }
810    /// ```
811    pub fn get_current() -> CudaResult<UnownedContext> {
812        unsafe {
813            let mut ctx: CUcontext = ptr::null_mut();
814            cuda_driver_sys::cuCtxGetCurrent(&mut ctx as *mut CUcontext).to_result()?;
815            Ok(UnownedContext { inner: ctx })
816        }
817    }
818
819    /// Set the given context as the current context for this thread.
820    ///
821    /// If there is no context set for this thread, this pushes the given context onto the stack.
822    /// If there is a context set for this thread, this replaces the top context on the stack with
823    /// the given context.
824    ///
825    /// # Example
826    ///
827    /// ```
828    /// # use rustacuda::device::Device;
829    /// # use rustacuda::context::{ Context, ContextFlags, CurrentContext };
830    /// # use std::error::Error;
831    /// #
832    /// # fn main () -> Result<(), Box<dyn Error>> {
833    /// # rustacuda::init(rustacuda::CudaFlags::empty())?;
834    /// # let device = Device::get_device(0)?;
835    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
836    /// CurrentContext::set_current(&context)?;
837    /// # Ok(())
838    /// # }
839    /// ```
840    pub fn set_current<C: ContextHandle>(c: &C) -> CudaResult<()> {
841        unsafe {
842            cuda_driver_sys::cuCtxSetCurrent(c.get_inner()).to_result()?;
843            Ok(())
844        }
845    }
846
847    /// Block to wait for a context's tasks to complete.
848    pub fn synchronize() -> CudaResult<()> {
849        unsafe {
850            cuda_driver_sys::cuCtxSynchronize().to_result()?;
851            Ok(())
852        }
853    }
854}