pub struct Kernel<'a> {
    pub builder: ExecuteKernel<'a>,
    /* private fields */
}
Expand description

A kernel that can be executed.

Fields§

§builder: ExecuteKernel<'a>

The underlying kernel builder.

Implementations§

source§

impl<'a> Kernel<'a>

source

pub fn arg<T: KernelArgument>(self, t: &'a T) -> Self

Set a kernel argument.

The arguments must live as long as the kernel. Hence make sure they are not dropped as long as the kernel is in use.

Example where this behaviour is enforced and leads to a compile-time error:

use rust_gpu_tools::opencl::Program;

fn would_break(program: &Program) {
   let data = vec![1, 2, 3, 4];
   let buffer = program.create_buffer_from_slice(&data).unwrap();
   let kernel = program.create_kernel("my_kernel", 4, 256).unwrap();
   let kernel = kernel.arg(&buffer);
   // This drop wouldn't error if the arguments wouldn't be bound to the kernels lifetime.
   drop(buffer);
   kernel.run().unwrap();
}
Examples found in repository?
examples/add.rs (line 45)
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
pub fn main() {
    // Define some data that should be operated on.
    let aa: Vec<u32> = vec![1, 2, 3, 4];
    let bb: Vec<u32> = vec![5, 6, 7, 8];

    // This is the core. Here we write the interaction with the GPU independent of whether it is
    // CUDA or OpenCL.
    let closures = program_closures!(|program, _args| -> Result<Vec<u32>, GPUError> {
        // Make sure the input data has the same length.
        assert_eq!(aa.len(), bb.len());
        let length = aa.len();

        // Copy the data to the GPU.
        let aa_buffer = program.create_buffer_from_slice(&aa)?;
        let bb_buffer = program.create_buffer_from_slice(&bb)?;

        // The result buffer has the same length as the input buffers.
        let result_buffer = unsafe { program.create_buffer::<u32>(length)? };

        // Get the kernel.
        let kernel = program.create_kernel("add", 1, 1)?;

        // Execute the kernel.
        kernel
            .arg(&(length as u32))
            .arg(&aa_buffer)
            .arg(&bb_buffer)
            .arg(&result_buffer)
            .run()?;

        // Get the resulting data.
        let mut result = vec![0u32; length];
        program.read_into_buffer(&result_buffer, &mut result)?;

        Ok(result)
    });

    // Get the first available device.
    let device = *Device::all().first().unwrap();

    // First we run it on CUDA.
    let cuda_program = cuda(device);
    let cuda_result = cuda_program.run(closures, ()).unwrap();
    assert_eq!(cuda_result, [6, 8, 10, 12]);
    println!("CUDA result: {:?}", cuda_result);

    // Then we run it on OpenCL.
    let opencl_program = opencl(device);
    let opencl_result = opencl_program.run(closures, ()).unwrap();
    assert_eq!(opencl_result, [6, 8, 10, 12]);
    println!("OpenCL result: {:?}", opencl_result);
}
source

pub fn run(self) -> Result<(), GPUError>

Actually run the kernel.

Examples found in repository?
examples/add.rs (line 49)
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
pub fn main() {
    // Define some data that should be operated on.
    let aa: Vec<u32> = vec![1, 2, 3, 4];
    let bb: Vec<u32> = vec![5, 6, 7, 8];

    // This is the core. Here we write the interaction with the GPU independent of whether it is
    // CUDA or OpenCL.
    let closures = program_closures!(|program, _args| -> Result<Vec<u32>, GPUError> {
        // Make sure the input data has the same length.
        assert_eq!(aa.len(), bb.len());
        let length = aa.len();

        // Copy the data to the GPU.
        let aa_buffer = program.create_buffer_from_slice(&aa)?;
        let bb_buffer = program.create_buffer_from_slice(&bb)?;

        // The result buffer has the same length as the input buffers.
        let result_buffer = unsafe { program.create_buffer::<u32>(length)? };

        // Get the kernel.
        let kernel = program.create_kernel("add", 1, 1)?;

        // Execute the kernel.
        kernel
            .arg(&(length as u32))
            .arg(&aa_buffer)
            .arg(&bb_buffer)
            .arg(&result_buffer)
            .run()?;

        // Get the resulting data.
        let mut result = vec![0u32; length];
        program.read_into_buffer(&result_buffer, &mut result)?;

        Ok(result)
    });

    // Get the first available device.
    let device = *Device::all().first().unwrap();

    // First we run it on CUDA.
    let cuda_program = cuda(device);
    let cuda_result = cuda_program.run(closures, ()).unwrap();
    assert_eq!(cuda_result, [6, 8, 10, 12]);
    println!("CUDA result: {:?}", cuda_result);

    // Then we run it on OpenCL.
    let opencl_program = opencl(device);
    let opencl_result = opencl_program.run(closures, ()).unwrap();
    assert_eq!(opencl_result, [6, 8, 10, 12]);
    println!("OpenCL result: {:?}", opencl_result);
}

Trait Implementations§

source§

impl<'a> Debug for Kernel<'a>

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Auto Trait Implementations§

§

impl<'a> RefUnwindSafe for Kernel<'a>

§

impl<'a> !Send for Kernel<'a>

§

impl<'a> !Sync for Kernel<'a>

§

impl<'a> Unpin for Kernel<'a>

§

impl<'a> UnwindSafe for Kernel<'a>

Blanket Implementations§

source§

impl<T> Any for Twhere T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for Twhere T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for Twhere T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for Twhere U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> Same<T> for T

§

type Output = T

Should always be Self
source§

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.