Struct GpuDispatchPolicy

Source

pub struct GpuDispatchPolicy {Show 14 fields
    pub xtwx_n_min: usize,
    pub xtwx_flops_min: usize,
    pub xtwx_use_fused_below_p: usize,
    pub gemm_min_flops: usize,
    pub potrf_min_p: usize,
    pub small_dense_batched_potrf_max_p: usize,
    pub small_dense_batched_potrf_min_batch: usize,
    pub syevd_min_p: usize,
    pub sparse_min_nnz: usize,
    pub fused_kernel_min_n: usize,
    pub keep_design_resident_min_bytes: usize,
    pub prefer_gpu_factorization_min_p: usize,
    pub row_kernel_min_n: usize,
    pub mixed_precision: GpuMixedPrecisionPolicy,
}

Fields§

§xtwx_n_min: usize§xtwx_flops_min: usize§xtwx_use_fused_below_p: usize§gemm_min_flops: usize§potrf_min_p: usize§small_dense_batched_potrf_max_p: usize§small_dense_batched_potrf_min_batch: usize§syevd_min_p: usize§sparse_min_nnz: usize§fused_kernel_min_n: usize§keep_design_resident_min_bytes: usize§prefer_gpu_factorization_min_p: usize§row_kernel_min_n: usize§mixed_precision: GpuMixedPrecisionPolicy

Struct GpuDispatchPolicy Copy item path

Fields§

Implementations§

impl GpuDispatchPolicy

pub const REFINEMENT_MIN_P: usize = 64

pub const REFINEMENT_MAX_STEPS: usize = 3

pub const REFINEMENT_TOL: f64 = 1e-12

pub const MATVEC_OFFLOAD_FLOPS_MIN: u128 = 10_000_000

pub const MATVEC_OFFLOAD_MIN_CG_ITERS: usize = 8

pub const fn iterative_refinement_should_attempt(&self, p: usize) -> bool

pub const fn dense_gemv_target_is_gpu( &self, n: usize, p: usize, resident: bool, ) -> bool

pub const fn xtwx_target_is_gpu( &self, n: usize, p: usize, materialized: bool, ) -> bool

pub const fn xtwy_target_is_gpu( &self, n: usize, px: usize, q: usize, materialized: bool, ) -> bool

pub const fn potrf_target_is_gpu(&self, p: usize, h_resident: bool) -> bool

pub const fn dense_hessian_work_target_is_gpu(&self, n: usize, p: usize) -> bool

pub const fn reduced_schur_matvec_should_offload( &self, n: usize, k: usize, d: usize, cg_iters: usize, ) -> bool

§Live arrow-Schur call site

impl GpuDispatchPolicy

pub const DEVICE_LOOP_MIN_P: usize = 32

pub const fn should_use_gpu_pirls_loop(&self, adm: PirlsLoopAdmission) -> bool

pub const fn should_run_reml_outer_on_device( &self, adm: RemlOuterAdmission, ) -> bool

Trait Implementations§

impl Clone for GpuDispatchPolicy

fn clone(&self) -> GpuDispatchPolicy

fn clone_from(&mut self, source: &Self)

impl Debug for GpuDispatchPolicy

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Default for GpuDispatchPolicy

fn default() -> Self

impl<'de> Deserialize<'de> for GpuDispatchPolicy

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where __D: Deserializer<'de>,

impl Eq for GpuDispatchPolicy

impl PartialEq for GpuDispatchPolicy

fn eq(&self, other: &GpuDispatchPolicy) -> bool

fn ne(&self, other: &Rhs) -> bool

impl Serialize for GpuDispatchPolicy

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>where __S: Serializer,

impl StructuralPartialEq for GpuDispatchPolicy

Auto Trait Implementations§

impl Freeze for GpuDispatchPolicy

impl RefUnwindSafe for GpuDispatchPolicy

impl Send for GpuDispatchPolicy

impl Sync for GpuDispatchPolicy

impl Unpin for GpuDispatchPolicy

impl UnsafeUnpin for GpuDispatchPolicy

impl UnwindSafe for GpuDispatchPolicy

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> ByRef<T> for T

fn by_ref(&self) -> &T

impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DTwhere ST: ?Sized, DT: ?Sized,

impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DTwhere ST: ?Sized, DT: ?Sized,

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> DeserializeOwned for Twhere T: for<'de> Deserialize<'de>,

impl<T> DistributionExt for Twhere T: ?Sized,

fn rand<T>(&self, rng: &mut (impl Rng + ?Sized)) -> Twhere Self: Distribution<T>,

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Imply<T> for Uwhere T: ?Sized, U: ?Sized,

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T> Pointable for T

const ALIGN: usize

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<T> Read<Exclusive, BecauseExclusive> for Twhere T: ?Sized,

impl<T> Same for T

type Output = T

Struct GpuDispatchPolicy

fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where __D: Deserializer<'de>,

fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DT
where ST: ?Sized, DT: ?Sized,

impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DT
where ST: ?Sized, DT: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,

impl<T> DistributionExt for T
where T: ?Sized,

fn rand<T>(&self, rng: &mut (impl Rng + ?Sized)) -> T
where Self: Distribution<T>,

impl<T, U> Imply<T> for U
where T: ?Sized, U: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T> Read<Exclusive, BecauseExclusive> for T
where T: ?Sized,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,