vyre-driver-cuda 0.6.1

CUDA/PTX backend for vyre through the CUDA driver API.
Documentation
use smallvec::SmallVec;
use vyre_driver::BackendError;

use crate::backend::dispatch::CudaBackend;
use crate::backend::output_range::CudaOutputReadback;
use crate::backend::resident::{CudaResidentBuffer, ResidentViewCache};
use crate::backend::resident_dispatch::helpers::borrow_resident_sequence_output_slots;
use crate::backend::resident_dispatch_support::CudaResidentDispatchStep;
use crate::backend::staging_reserve::{reserve_smallvec, reserved_vec};

impl CudaBackend {
    pub(crate) fn dispatch_resident_sequence(
        &self,
        steps: &[CudaResidentDispatchStep<'_>],
    ) -> Result<(), BackendError> {
        self.dispatch_resident_sequence_read_many(steps, &[])
            .map(|_| ())
    }

    pub(crate) fn dispatch_resident_sequence_read_many(
        &self,
        steps: &[CudaResidentDispatchStep<'_>],
        read_handles: &[CudaResidentBuffer],
    ) -> Result<Vec<Vec<u8>>, BackendError> {
        self.upload_resident_many_sequence_read_many(&[], steps, read_handles)
    }

    pub(crate) fn upload_resident_many_sequence_read_many(
        &self,
        uploads: &[(CudaResidentBuffer, &[u8])],
        steps: &[CudaResidentDispatchStep<'_>],
        read_handles: &[CudaResidentBuffer],
    ) -> Result<Vec<Vec<u8>>, BackendError> {
        let mut outputs = reserved_vec(read_handles.len(), "resident sequence read outputs")?;
        self.upload_resident_many_sequence_read_many_into(
            uploads,
            steps,
            read_handles,
            &mut outputs,
        )?;
        Ok(outputs)
    }

    pub(crate) fn upload_resident_many_sequence_read_many_into(
        &self,
        uploads: &[(CudaResidentBuffer, &[u8])],
        steps: &[CudaResidentDispatchStep<'_>],
        read_handles: &[CudaResidentBuffer],
        outputs: &mut Vec<Vec<u8>>,
    ) -> Result<(), BackendError> {
        let mut readbacks = SmallVec::<[CudaOutputReadback; 8]>::new();
        self.prepare_full_resident_readbacks(read_handles, &mut readbacks)?;
        self.upload_resident_many_sequence_read_ranges_into(
            uploads,
            steps,
            read_handles,
            &readbacks,
            outputs,
        )
    }

    pub(crate) fn clear_upload_resident_many_sequence_read_many_into(
        &self,
        clears: &[CudaResidentBuffer],
        uploads: &[(CudaResidentBuffer, &[u8])],
        steps: &[CudaResidentDispatchStep<'_>],
        read_handles: &[CudaResidentBuffer],
        outputs: &mut Vec<Vec<u8>>,
    ) -> Result<(), BackendError> {
        let mut fills = SmallVec::<[(CudaResidentBuffer, u8); 8]>::new();
        reserve_smallvec(&mut fills, clears.len(), "resident sequence clear fills")?;
        fills.extend(clears.iter().copied().map(|handle| (handle, 0)));
        self.fill_upload_resident_many_sequence_read_many_into(
            &fills,
            uploads,
            steps,
            read_handles,
            outputs,
        )
    }

    pub(crate) fn fill_upload_resident_many_sequence_read_many_into(
        &self,
        fills: &[(CudaResidentBuffer, u8)],
        uploads: &[(CudaResidentBuffer, &[u8])],
        steps: &[CudaResidentDispatchStep<'_>],
        read_handles: &[CudaResidentBuffer],
        outputs: &mut Vec<Vec<u8>>,
    ) -> Result<(), BackendError> {
        let mut readbacks = SmallVec::<[CudaOutputReadback; 8]>::new();
        self.prepare_full_resident_readbacks(read_handles, &mut readbacks)?;
        let mut borrowed_outputs =
            borrow_resident_sequence_output_slots(outputs, read_handles.len())?;
        self.fill_upload_resident_many_sequence_read_ranges_borrowed_into(
            fills,
            uploads,
            steps,
            read_handles,
            &readbacks,
            borrowed_outputs.as_mut_slice(),
        )
    }

    fn prepare_full_resident_readbacks(
        &self,
        read_handles: &[CudaResidentBuffer],
        readbacks: &mut SmallVec<[CudaOutputReadback; 8]>,
    ) -> Result<(), BackendError> {
        readbacks.clear();
        reserve_smallvec(
            readbacks,
            read_handles.len(),
            "resident sequence full readbacks",
        )?;
        let mut resident_view_cache = ResidentViewCache::new();
        reserve_smallvec(
            &mut resident_view_cache,
            read_handles.len(),
            "resident sequence full-readback view cache",
        )?;
        for &handle in read_handles {
            let buffer = self.resident_store.view_cached(
                handle,
                &mut resident_view_cache,
                "resident sequence full-readback view cache",
            )?;
            readbacks.push(CudaOutputReadback {
                device_offset: 0,
                byte_len: buffer.byte_len,
            });
        }
        Ok(())
    }

    pub(crate) fn upload_resident_many_sequence_read_ranges_into(
        &self,
        uploads: &[(CudaResidentBuffer, &[u8])],
        steps: &[CudaResidentDispatchStep<'_>],
        read_handles: &[CudaResidentBuffer],
        readbacks: &[CudaOutputReadback],
        outputs: &mut Vec<Vec<u8>>,
    ) -> Result<(), BackendError> {
        let mut borrowed_outputs =
            borrow_resident_sequence_output_slots(outputs, read_handles.len())?;
        self.upload_resident_many_sequence_read_ranges_borrowed_into(
            uploads,
            steps,
            read_handles,
            readbacks,
            borrowed_outputs.as_mut_slice(),
        )
    }

    pub(crate) fn upload_resident_many_sequence_read_ranges_borrowed_into(
        &self,
        uploads: &[(CudaResidentBuffer, &[u8])],
        steps: &[CudaResidentDispatchStep<'_>],
        read_handles: &[CudaResidentBuffer],
        readbacks: &[CudaOutputReadback],
        outputs: &mut [&mut Vec<u8>],
    ) -> Result<(), BackendError> {
        self.fill_upload_resident_many_repeated_sequence_read_ranges_borrowed_into(
            &[],
            uploads,
            steps,
            &[],
            0,
            read_handles,
            readbacks,
            outputs,
        )
    }

    pub(crate) fn fill_upload_resident_many_sequence_read_ranges_borrowed_into(
        &self,
        fills: &[(CudaResidentBuffer, u8)],
        uploads: &[(CudaResidentBuffer, &[u8])],
        steps: &[CudaResidentDispatchStep<'_>],
        read_handles: &[CudaResidentBuffer],
        readbacks: &[CudaOutputReadback],
        outputs: &mut [&mut Vec<u8>],
    ) -> Result<(), BackendError> {
        self.fill_upload_resident_many_repeated_sequence_read_ranges_borrowed_into(
            fills,
            uploads,
            steps,
            &[],
            0,
            read_handles,
            readbacks,
            outputs,
        )
    }

    pub(crate) fn upload_resident_many_repeated_sequence_read_ranges_borrowed_into(
        &self,
        uploads: &[(CudaResidentBuffer, &[u8])],
        prefix_steps: &[CudaResidentDispatchStep<'_>],
        repeated_steps: &[CudaResidentDispatchStep<'_>],
        repeat_count: usize,
        read_handles: &[CudaResidentBuffer],
        readbacks: &[CudaOutputReadback],
        outputs: &mut [&mut Vec<u8>],
    ) -> Result<(), BackendError> {
        self.fill_upload_resident_many_repeated_sequence_read_ranges_borrowed_into(
            &[],
            uploads,
            prefix_steps,
            repeated_steps,
            repeat_count,
            read_handles,
            readbacks,
            outputs,
        )
    }
}