Struct rust_gpu_tools::Device

source ·

pub struct Device { /* private fields */ }

Expand description

A device that may have a CUDA and/or OpenCL GPU associated with it.

Implementations§

source §

impl Device

source

pub fn vendor(&self) -> Vendor

Returns the Vendor of the GPU.

source

pub fn name(&self) -> String

Returns the name of the GPU, e.g. “GeForce RTX 3090”.

source

pub fn memory(&self) -> u64

Returns the memory of the GPU in bytes.

source

pub fn compute_units(&self) -> u32

Returns the number of compute units of the GPU.

source

pub fn compute_capability(&self) -> Option<(u32, u32)>

Returns the major and minor version of the compute capability (only available on Nvidia GPUs).

source

pub fn unique_id(&self) -> UniqueId

Returns the best possible unique identifier, a UUID is preferred over a PCI ID.

source

pub fn framework(&self) -> Framework

Returns the preferred framework (CUDA or OpenCL) to use.

CUDA will be be preferred over OpenCL. The returned framework will work on the device. E.g. it won’t return Framework::Cuda for an AMD device.

source

pub fn cuda_device(&self) -> Option<&Device>

Returns the underlying CUDA device if it is available.

Examples found in repository ?

examples/add.rs (line 8)

fn cuda(device: &Device) -> Program {
    // The kernel was compiled with:
    // nvcc -fatbin -gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_75,code=compute_75 --x cu add.cl
    let cuda_kernel = include_bytes!("./add.fatbin");
    let cuda_device = device.cuda_device().unwrap();
    let cuda_program = cuda::Program::from_bytes(cuda_device, cuda_kernel).unwrap();
    Program::Cuda(cuda_program)
}

source

pub fn opencl_device(&self) -> Option<&Device>

Returns the underlying OpenCL device if it is available.

Examples found in repository ?

examples/add.rs (line 16)

fn opencl(device: &Device) -> Program {
    let opencl_kernel = include_str!("./add.cl");
    let opencl_device = device.opencl_device().unwrap();
    let opencl_program = opencl::Program::from_opencl(opencl_device, opencl_kernel).unwrap();
    Program::Opencl(opencl_program)
}

source

pub fn all() -> Vec<&'static Device>

Returns all available GPUs that are supported.

Examples found in repository ?

examples/add.rs (line 59)

pub fn main() {
    // Define some data that should be operated on.
    let aa: Vec<u32> = vec![1, 2, 3, 4];
    let bb: Vec<u32> = vec![5, 6, 7, 8];

    // This is the core. Here we write the interaction with the GPU independent of whether it is
    // CUDA or OpenCL.
    let closures = program_closures!(|program, _args| -> Result<Vec<u32>, GPUError> {
        // Make sure the input data has the same length.
        assert_eq!(aa.len(), bb.len());
        let length = aa.len();

        // Copy the data to the GPU.
        let aa_buffer = program.create_buffer_from_slice(&aa)?;
        let bb_buffer = program.create_buffer_from_slice(&bb)?;

        // The result buffer has the same length as the input buffers.
        let result_buffer = unsafe { program.create_buffer::<u32>(length)? };

        // Get the kernel.
        let kernel = program.create_kernel("add", 1, 1)?;

        // Execute the kernel.
        kernel
            .arg(&(length as u32))
            .arg(&aa_buffer)
            .arg(&bb_buffer)
            .arg(&result_buffer)
            .run()?;

        // Get the resulting data.
        let mut result = vec![0u32; length];
        program.read_into_buffer(&result_buffer, &mut result)?;

        Ok(result)
    });

    // Get the first available device.
    let device = *Device::all().first().unwrap();

    // First we run it on CUDA.
    let cuda_program = cuda(device);
    let cuda_result = cuda_program.run(closures, ()).unwrap();
    assert_eq!(cuda_result, [6, 8, 10, 12]);
    println!("CUDA result: {:?}", cuda_result);

    // Then we run it on OpenCL.
    let opencl_program = opencl(device);
    let opencl_result = opencl_program.run(closures, ()).unwrap();
    assert_eq!(opencl_result, [6, 8, 10, 12]);
    println!("OpenCL result: {:?}", opencl_result);
}