pub use super::kernels::Blake3KernelId as KernelId;
use crate::platform::Caps;
#[cfg(target_arch = "aarch64")]
use crate::platform::caps::aarch64;
#[cfg(target_arch = "powerpc64")]
use crate::platform::caps::power;
#[cfg(target_arch = "riscv64")]
use crate::platform::caps::riscv;
#[cfg(target_arch = "s390x")]
use crate::platform::caps::s390x;
#[cfg(target_arch = "x86_64")]
use crate::platform::caps::x86;
pub const DEFAULT_BOUNDARIES: [usize; 3] = [64, 256, 4096];
#[cfg(target_arch = "x86_64")]
const THRESHOLD_AVX512: usize = 4 * 1024; #[cfg(target_arch = "x86_64")]
const THRESHOLD_AVX2: usize = 8 * 1024; #[cfg(target_arch = "aarch64")]
const THRESHOLD_NEON: usize = 16 * 1024; #[cfg(not(target_arch = "x86_64"))]
const THRESHOLD_PORTABLE: usize = 32 * 1024;
const DEFAULT_PAR_SPAWN_COST_BYTES: usize = 24 * 1024;
const DEFAULT_PAR_MERGE_COST_BYTES: usize = 16 * 1024;
const DEFAULT_PAR_BYTES_PER_CORE_SMALL: usize = 256 * 1024;
const DEFAULT_PAR_BYTES_PER_CORE_MEDIUM: usize = 128 * 1024;
const DEFAULT_PAR_BYTES_PER_CORE_LARGE: usize = 64 * 1024;
const DEFAULT_PAR_SMALL_LIMIT_BYTES: usize = 256 * 1024;
const DEFAULT_PAR_MEDIUM_LIMIT_BYTES: usize = 2 * 1024 * 1024;
#[cfg_attr(not(feature = "parallel"), allow(dead_code))]
#[derive(Clone, Copy, Debug)]
pub struct ParallelTable {
pub min_bytes: usize,
pub min_chunks: usize,
pub max_threads: u8,
pub spawn_cost_bytes: usize,
pub merge_cost_bytes: usize,
pub bytes_per_core_small: usize,
pub bytes_per_core_medium: usize,
pub bytes_per_core_large: usize,
pub small_limit_bytes: usize,
pub medium_limit_bytes: usize,
}
#[derive(Clone, Copy, Debug)]
pub struct StreamingTable {
pub stream: KernelId,
pub bulk: KernelId,
pub bulk_sizeclass_threshold: usize,
}
#[derive(Clone, Copy, Debug)]
pub struct DispatchTable {
pub boundaries: [usize; 3],
pub xs: KernelId,
pub s: KernelId,
pub m: KernelId,
pub l: KernelId,
}
#[cfg_attr(not(feature = "parallel"), allow(dead_code))]
#[derive(Clone, Copy, Debug)]
pub struct FamilyProfile {
pub dispatch: DispatchTable,
pub streaming: StreamingTable,
pub parallel: ParallelTable,
pub streaming_parallel: ParallelTable,
}
#[cfg(any(
target_arch = "x86_64",
target_arch = "riscv64",
target_arch = "s390x",
target_arch = "powerpc64"
))]
#[cfg(target_arch = "x86_64")]
const SIMD_KERNEL: KernelId = KernelId::X86Avx2;
#[cfg(any(
target_arch = "x86_64",
target_arch = "riscv64",
target_arch = "s390x",
target_arch = "powerpc64"
))]
#[cfg(target_arch = "s390x")]
const SIMD_KERNEL: KernelId = KernelId::S390xVector;
#[cfg(any(
target_arch = "x86_64",
target_arch = "riscv64",
target_arch = "s390x",
target_arch = "powerpc64"
))]
#[cfg(target_arch = "powerpc64")]
const SIMD_KERNEL: KernelId = KernelId::PowerVsx;
#[cfg(any(
target_arch = "x86_64",
target_arch = "riscv64",
target_arch = "s390x",
target_arch = "powerpc64"
))]
#[cfg(target_arch = "riscv64")]
const SIMD_KERNEL: KernelId = KernelId::RiscvV;
#[cfg(any(
target_arch = "x86_64",
target_arch = "riscv64",
target_arch = "s390x",
target_arch = "powerpc64"
))]
#[cfg(not(any(
target_arch = "x86_64",
target_arch = "riscv64",
target_arch = "s390x",
target_arch = "powerpc64"
)))]
const SIMD_KERNEL: KernelId = KernelId::Portable;
#[cfg(target_arch = "s390x")]
const S390X_VECTOR_KERNEL: KernelId = KernelId::S390xVector;
#[cfg(target_arch = "powerpc64")]
const POWER_VSX_KERNEL: KernelId = KernelId::PowerVsx;
#[cfg(any(
target_arch = "x86_64",
target_arch = "riscv64",
target_arch = "s390x",
target_arch = "powerpc64"
))]
#[cfg(target_arch = "x86_64")]
const DEFAULT_XS: KernelId = KernelId::X86Sse41;
#[cfg(any(
target_arch = "x86_64",
target_arch = "riscv64",
target_arch = "s390x",
target_arch = "powerpc64"
))]
#[cfg(not(target_arch = "x86_64"))]
const DEFAULT_XS: KernelId = KernelId::Portable;
#[cfg(any(
target_arch = "x86_64",
target_arch = "riscv64",
target_arch = "s390x",
target_arch = "powerpc64"
))]
#[cfg(target_arch = "x86_64")]
const DEFAULT_S: KernelId = KernelId::X86Sse41;
#[cfg(any(
target_arch = "x86_64",
target_arch = "riscv64",
target_arch = "s390x",
target_arch = "powerpc64"
))]
#[cfg(not(target_arch = "x86_64"))]
const DEFAULT_S: KernelId = KernelId::Portable;
#[cfg(any(
target_arch = "x86_64",
target_arch = "riscv64",
target_arch = "s390x",
target_arch = "powerpc64"
))]
const DEFAULT_M: KernelId = SIMD_KERNEL;
#[cfg(any(
target_arch = "x86_64",
target_arch = "riscv64",
target_arch = "s390x",
target_arch = "powerpc64"
))]
const DEFAULT_L: KernelId = SIMD_KERNEL;
#[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))]
#[cfg(target_arch = "x86_64")]
const DEFAULT_STREAM_KERNEL: KernelId = KernelId::X86Avx2;
#[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))]
#[cfg(not(target_arch = "x86_64"))]
const DEFAULT_STREAM_KERNEL: KernelId = KernelId::Portable;
#[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))]
const DEFAULT_BULK_KERNEL: KernelId = SIMD_KERNEL;
#[derive(Clone, Copy, Debug)]
struct ParallelCostModel {
spawn_cost_bytes: usize,
merge_cost_bytes: usize,
bytes_per_core_small: usize,
bytes_per_core_medium: usize,
bytes_per_core_large: usize,
small_limit_bytes: usize,
medium_limit_bytes: usize,
}
#[inline]
#[must_use]
const fn parallel_cost_model(
spawn_cost_bytes: usize,
merge_cost_bytes: usize,
bytes_per_core_small: usize,
bytes_per_core_medium: usize,
bytes_per_core_large: usize,
small_limit_bytes: usize,
medium_limit_bytes: usize,
) -> ParallelCostModel {
ParallelCostModel {
spawn_cost_bytes,
merge_cost_bytes,
bytes_per_core_small,
bytes_per_core_medium,
bytes_per_core_large,
small_limit_bytes,
medium_limit_bytes,
}
}
#[inline]
#[must_use]
const fn parallel_table(
min_bytes: usize,
min_chunks: usize,
max_threads: u8,
cost: ParallelCostModel,
) -> ParallelTable {
ParallelTable {
min_bytes,
min_chunks,
max_threads,
spawn_cost_bytes: cost.spawn_cost_bytes,
merge_cost_bytes: cost.merge_cost_bytes,
bytes_per_core_small: cost.bytes_per_core_small,
bytes_per_core_medium: cost.bytes_per_core_medium,
bytes_per_core_large: cost.bytes_per_core_large,
small_limit_bytes: cost.small_limit_bytes,
medium_limit_bytes: cost.medium_limit_bytes,
}
}
#[cfg(any(target_arch = "s390x", target_arch = "powerpc64"))]
macro_rules! parallel_costs {
(
$min_bytes:expr,
$min_chunks:expr,
$max_threads:expr,
$spawn_cost_bytes:expr,
$merge_cost_bytes:expr,
$bytes_per_core_small:expr,
$bytes_per_core_medium:expr,
$bytes_per_core_large:expr,
$small_limit_bytes:expr,
$medium_limit_bytes:expr $(,)?
) => {
parallel_table(
$min_bytes,
$min_chunks,
$max_threads,
parallel_cost_model(
$spawn_cost_bytes,
$merge_cost_bytes,
$bytes_per_core_small,
$bytes_per_core_medium,
$bytes_per_core_large,
$small_limit_bytes,
$medium_limit_bytes,
),
)
};
}
#[inline]
#[must_use]
const fn default_parallel_costs(min_bytes: usize, min_chunks: usize, max_threads: u8) -> ParallelTable {
parallel_table(
min_bytes,
min_chunks,
max_threads,
parallel_cost_model(
DEFAULT_PAR_SPAWN_COST_BYTES,
DEFAULT_PAR_MERGE_COST_BYTES,
DEFAULT_PAR_BYTES_PER_CORE_SMALL,
DEFAULT_PAR_BYTES_PER_CORE_MEDIUM,
DEFAULT_PAR_BYTES_PER_CORE_LARGE,
DEFAULT_PAR_SMALL_LIMIT_BYTES,
DEFAULT_PAR_MEDIUM_LIMIT_BYTES,
),
)
}
#[inline]
#[must_use]
#[cfg(any(target_arch = "s390x", target_arch = "powerpc64"))]
const fn scalar_profile_parallel(
min_bytes: usize,
min_chunks: usize,
max_threads: u8,
generation: u8,
) -> ParallelTable {
match generation {
0 => parallel_costs!(
min_bytes,
min_chunks,
max_threads,
64 * 1024,
48 * 1024,
384 * 1024,
256 * 1024,
192 * 1024,
512 * 1024,
4 * 1024 * 1024,
),
1 => parallel_costs!(
min_bytes,
min_chunks,
max_threads,
56 * 1024,
40 * 1024,
320 * 1024,
224 * 1024,
160 * 1024,
384 * 1024,
3 * 1024 * 1024,
),
2 => parallel_costs!(
min_bytes,
min_chunks,
max_threads,
48 * 1024,
32 * 1024,
256 * 1024,
192 * 1024,
128 * 1024,
320 * 1024,
3 * 1024 * 1024,
),
3 => parallel_costs!(
min_bytes,
min_chunks,
max_threads,
40 * 1024,
28 * 1024,
256 * 1024,
160 * 1024,
96 * 1024,
256 * 1024,
2 * 1024 * 1024,
),
_ => parallel_costs!(
min_bytes,
min_chunks,
max_threads,
32 * 1024,
24 * 1024,
224 * 1024,
128 * 1024,
80 * 1024,
256 * 1024,
2 * 1024 * 1024,
),
}
}
#[inline]
#[must_use]
#[cfg(any(
target_arch = "x86_64",
target_arch = "riscv64",
target_arch = "s390x",
target_arch = "powerpc64"
))]
const fn default_kind_table() -> DispatchTable {
DispatchTable {
boundaries: DEFAULT_BOUNDARIES,
xs: DEFAULT_XS,
s: DEFAULT_S,
m: DEFAULT_M,
l: DEFAULT_L,
}
}
#[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))]
#[cfg(target_arch = "x86_64")]
const DEFAULT_BULK_THRESHOLD: usize = THRESHOLD_AVX2;
#[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))]
#[cfg(not(target_arch = "x86_64"))]
const DEFAULT_BULK_THRESHOLD: usize = THRESHOLD_PORTABLE;
#[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))]
#[inline]
#[must_use]
const fn default_kind_streaming_table() -> StreamingTable {
StreamingTable {
stream: DEFAULT_STREAM_KERNEL,
bulk: DEFAULT_BULK_KERNEL,
bulk_sizeclass_threshold: DEFAULT_BULK_THRESHOLD,
}
}
#[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))]
#[inline]
#[must_use]
const fn default_kind_parallel_table() -> ParallelTable {
default_parallel_costs(128 * 1024, 64, 0)
}
#[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))]
#[inline]
#[must_use]
const fn default_kind_streaming_parallel_table() -> ParallelTable {
default_kind_parallel_table()
}
#[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))]
#[inline]
#[must_use]
const fn default_kind_profile() -> FamilyProfile {
FamilyProfile {
dispatch: default_kind_table(),
streaming: default_kind_streaming_table(),
parallel: default_kind_parallel_table(),
streaming_parallel: default_kind_streaming_parallel_table(),
}
}
#[cfg(not(target_arch = "x86_64"))]
#[inline]
#[must_use]
const fn portable_profile() -> FamilyProfile {
FamilyProfile {
dispatch: DispatchTable {
boundaries: DEFAULT_BOUNDARIES,
xs: KernelId::Portable,
s: KernelId::Portable,
m: KernelId::Portable,
l: KernelId::Portable,
},
streaming: StreamingTable {
stream: KernelId::Portable,
bulk: KernelId::Portable,
bulk_sizeclass_threshold: THRESHOLD_PORTABLE,
},
parallel: default_parallel_costs(128 * 1024, 64, 0),
streaming_parallel: default_parallel_costs(128 * 1024, 64, 0),
}
}
#[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))]
pub static PROFILE_DEFAULT_KIND: FamilyProfile = default_kind_profile();
#[cfg(not(target_arch = "x86_64"))]
pub static PROFILE_PORTABLE: FamilyProfile = portable_profile();
#[cfg(target_arch = "x86_64")]
pub static PROFILE_X86_AVX512_AMX: FamilyProfile = FamilyProfile {
dispatch: DispatchTable {
boundaries: [64, 1024, 4096],
xs: KernelId::X86Avx512,
s: KernelId::X86Avx512,
m: KernelId::X86Avx512,
l: KernelId::X86Avx512,
},
streaming: StreamingTable {
stream: KernelId::X86Avx2,
bulk: KernelId::X86Avx512,
bulk_sizeclass_threshold: THRESHOLD_AVX512,
},
parallel: ParallelTable {
min_bytes: 65536,
min_chunks: 64,
max_threads: 4,
spawn_cost_bytes: 24576,
merge_cost_bytes: 131072,
bytes_per_core_small: 32768,
bytes_per_core_medium: 224256,
bytes_per_core_large: 1010688,
small_limit_bytes: 1048576,
medium_limit_bytes: 4194304,
},
streaming_parallel: ParallelTable {
min_bytes: 0,
min_chunks: 0,
max_threads: 1,
spawn_cost_bytes: 24576,
merge_cost_bytes: 16384,
bytes_per_core_small: 262144,
bytes_per_core_medium: 131072,
bytes_per_core_large: 65536,
small_limit_bytes: 262144,
medium_limit_bytes: 2097152,
},
};
#[cfg(target_arch = "x86_64")]
pub static PROFILE_X86_AVX512: FamilyProfile = FamilyProfile {
dispatch: DispatchTable {
boundaries: [64, 1024, 4096],
xs: KernelId::X86Avx512,
s: KernelId::X86Avx512,
m: KernelId::X86Avx512,
l: KernelId::X86Avx512,
},
streaming: StreamingTable {
stream: KernelId::X86Avx2,
bulk: KernelId::X86Avx512,
bulk_sizeclass_threshold: THRESHOLD_AVX512,
},
parallel: ParallelTable {
min_bytes: 65536,
min_chunks: 64,
max_threads: 8,
spawn_cost_bytes: 24576,
merge_cost_bytes: 16384,
bytes_per_core_small: 24576,
bytes_per_core_medium: 107520,
bytes_per_core_large: 1025024,
small_limit_bytes: 262144,
medium_limit_bytes: 2097152,
},
streaming_parallel: ParallelTable {
min_bytes: 0,
min_chunks: 0,
max_threads: 1,
spawn_cost_bytes: 24576,
merge_cost_bytes: 16384,
bytes_per_core_small: 262144,
bytes_per_core_medium: 131072,
bytes_per_core_large: 65536,
small_limit_bytes: 262144,
medium_limit_bytes: 2097152,
},
};
#[cfg(target_arch = "aarch64")]
pub static PROFILE_AARCH64_NEON: FamilyProfile = FamilyProfile {
dispatch: DispatchTable {
boundaries: [64, 4095, 4096],
xs: KernelId::Portable,
s: KernelId::Portable,
m: KernelId::Aarch64Neon,
l: KernelId::Aarch64Neon,
},
streaming: StreamingTable {
stream: KernelId::Portable,
bulk: KernelId::Aarch64Neon,
bulk_sizeclass_threshold: THRESHOLD_NEON,
},
parallel: ParallelTable {
min_bytes: 65536,
min_chunks: 64,
max_threads: 8,
spawn_cost_bytes: 4096,
merge_cost_bytes: 4096,
bytes_per_core_small: 4096,
bytes_per_core_medium: 107520,
bytes_per_core_large: 1025024,
small_limit_bytes: 262144,
medium_limit_bytes: 2097152,
},
streaming_parallel: ParallelTable {
min_bytes: 0,
min_chunks: 0,
max_threads: 1,
spawn_cost_bytes: 24576,
merge_cost_bytes: 16384,
bytes_per_core_small: 262144,
bytes_per_core_medium: 131072,
bytes_per_core_large: 65536,
small_limit_bytes: 262144,
medium_limit_bytes: 2097152,
},
};
#[cfg(target_arch = "s390x")]
pub static PROFILE_Z13: FamilyProfile = FamilyProfile {
dispatch: default_kind_table(),
streaming: StreamingTable {
stream: KernelId::Portable,
bulk: S390X_VECTOR_KERNEL,
bulk_sizeclass_threshold: THRESHOLD_PORTABLE,
},
parallel: scalar_profile_parallel(256 * 1024, 128, 8, 0),
streaming_parallel: scalar_profile_parallel(256 * 1024, 128, 8, 0),
};
#[cfg(target_arch = "s390x")]
pub static PROFILE_Z14: FamilyProfile = FamilyProfile {
dispatch: default_kind_table(),
streaming: StreamingTable {
stream: KernelId::Portable,
bulk: S390X_VECTOR_KERNEL,
bulk_sizeclass_threshold: THRESHOLD_PORTABLE,
},
parallel: scalar_profile_parallel(192 * 1024, 96, 8, 1),
streaming_parallel: scalar_profile_parallel(192 * 1024, 96, 8, 1),
};
#[cfg(target_arch = "s390x")]
pub static PROFILE_Z15: FamilyProfile = FamilyProfile {
dispatch: DispatchTable {
boundaries: [64, 256, 4096],
xs: KernelId::Portable,
s: KernelId::Portable,
m: S390X_VECTOR_KERNEL,
l: S390X_VECTOR_KERNEL,
},
streaming: StreamingTable {
stream: KernelId::Portable,
bulk: S390X_VECTOR_KERNEL,
bulk_sizeclass_threshold: THRESHOLD_PORTABLE,
},
parallel: ParallelTable {
min_bytes: 65536,
min_chunks: 64,
max_threads: 4,
spawn_cost_bytes: 24576,
merge_cost_bytes: 16384,
bytes_per_core_small: 32768,
bytes_per_core_medium: 370688,
bytes_per_core_large: 65536,
small_limit_bytes: 262144,
medium_limit_bytes: 8388608,
},
streaming_parallel: ParallelTable {
min_bytes: 0,
min_chunks: 0,
max_threads: 1,
spawn_cost_bytes: 24576,
merge_cost_bytes: 16384,
bytes_per_core_small: 262144,
bytes_per_core_medium: 131072,
bytes_per_core_large: 65536,
small_limit_bytes: 262144,
medium_limit_bytes: 2097152,
},
};
#[cfg(target_arch = "powerpc64")]
pub static PROFILE_POWER7: FamilyProfile = FamilyProfile {
dispatch: default_kind_table(),
streaming: StreamingTable {
stream: KernelId::Portable,
bulk: POWER_VSX_KERNEL,
bulk_sizeclass_threshold: THRESHOLD_PORTABLE,
},
parallel: scalar_profile_parallel(256 * 1024, 128, 8, 0),
streaming_parallel: scalar_profile_parallel(256 * 1024, 128, 8, 0),
};
#[cfg(target_arch = "powerpc64")]
pub static PROFILE_POWER8: FamilyProfile = FamilyProfile {
dispatch: default_kind_table(),
streaming: StreamingTable {
stream: KernelId::Portable,
bulk: POWER_VSX_KERNEL,
bulk_sizeclass_threshold: THRESHOLD_PORTABLE,
},
parallel: scalar_profile_parallel(192 * 1024, 96, 8, 1),
streaming_parallel: scalar_profile_parallel(192 * 1024, 96, 8, 1),
};
#[cfg(target_arch = "powerpc64")]
pub static PROFILE_POWER9: FamilyProfile = FamilyProfile {
dispatch: default_kind_table(),
streaming: StreamingTable {
stream: KernelId::Portable,
bulk: POWER_VSX_KERNEL,
bulk_sizeclass_threshold: THRESHOLD_PORTABLE,
},
parallel: scalar_profile_parallel(128 * 1024, 64, 16, 3),
streaming_parallel: scalar_profile_parallel(128 * 1024, 64, 16, 3),
};
#[cfg(target_arch = "powerpc64")]
pub static PROFILE_POWER10: FamilyProfile = FamilyProfile {
dispatch: DispatchTable {
boundaries: [64, 256, 4096],
xs: KernelId::Portable,
s: POWER_VSX_KERNEL,
m: POWER_VSX_KERNEL,
l: POWER_VSX_KERNEL,
},
streaming: StreamingTable {
stream: KernelId::Portable,
bulk: POWER_VSX_KERNEL,
bulk_sizeclass_threshold: THRESHOLD_PORTABLE,
},
parallel: ParallelTable {
min_bytes: 65536,
min_chunks: 64,
max_threads: 4,
spawn_cost_bytes: 24576,
merge_cost_bytes: 16384,
bytes_per_core_small: 32768,
bytes_per_core_medium: 370688,
bytes_per_core_large: 65536,
small_limit_bytes: 262144,
medium_limit_bytes: 8388608,
},
streaming_parallel: ParallelTable {
min_bytes: 0,
min_chunks: 0,
max_threads: 1,
spawn_cost_bytes: 24576,
merge_cost_bytes: 16384,
bytes_per_core_small: 262144,
bytes_per_core_medium: 131072,
bytes_per_core_large: 65536,
small_limit_bytes: 262144,
medium_limit_bytes: 2097152,
},
};
#[inline]
#[must_use]
#[cfg(target_arch = "x86_64")]
fn has_any_amx(caps: Caps) -> bool {
caps.has(x86::AMX_TILE)
|| caps.has(x86::AMX_INT8)
|| caps.has(x86::AMX_BF16)
|| caps.has(x86::AMX_FP16)
|| caps.has(x86::AMX_COMPLEX)
}
#[inline]
#[must_use]
pub fn select_profile_for_caps(caps: Caps) -> &'static FamilyProfile {
#[cfg(target_arch = "x86_64")]
{
if caps.has(x86::AVX512_READY) {
return if has_any_amx(caps) {
&PROFILE_X86_AVX512_AMX
} else {
&PROFILE_X86_AVX512
};
}
&PROFILE_DEFAULT_KIND
}
#[cfg(target_arch = "aarch64")]
{
if caps.has(aarch64::NEON) {
return &PROFILE_AARCH64_NEON;
}
&PROFILE_PORTABLE
}
#[cfg(target_arch = "s390x")]
{
if caps.has(s390x::Z15_READY) {
return &PROFILE_Z15;
}
if caps.has(s390x::Z14_READY) {
return &PROFILE_Z14;
}
if caps.has(s390x::Z13_READY) {
return &PROFILE_Z13;
}
&PROFILE_PORTABLE
}
#[cfg(target_arch = "powerpc64")]
{
if caps.has(power::POWER10_READY) {
return &PROFILE_POWER10;
}
if caps.has(power::POWER9_READY) {
return &PROFILE_POWER9;
}
if caps.has(power::POWER8_READY) {
return &PROFILE_POWER8;
}
if caps.has(power::POWER7_READY) {
return &PROFILE_POWER7;
}
&PROFILE_PORTABLE
}
#[cfg(target_arch = "riscv64")]
{
if caps.has(riscv::V) {
return &PROFILE_DEFAULT_KIND;
}
&PROFILE_PORTABLE
}
#[cfg(not(any(
target_arch = "x86_64",
target_arch = "aarch64",
target_arch = "s390x",
target_arch = "powerpc64",
target_arch = "riscv64"
)))]
{
let _ = caps;
&PROFILE_PORTABLE
}
}
#[inline]
#[must_use]
pub fn select_table_for_caps(caps: Caps) -> &'static DispatchTable {
&select_profile_for_caps(caps).dispatch
}
#[inline]
#[must_use]
pub fn select_streaming_table_for_caps(caps: Caps) -> &'static StreamingTable {
&select_profile_for_caps(caps).streaming
}
#[cfg(feature = "parallel")]
#[inline]
#[must_use]
pub fn select_parallel_table_for_caps(caps: Caps) -> &'static ParallelTable {
&select_profile_for_caps(caps).parallel
}
#[cfg(feature = "parallel")]
#[inline]
#[must_use]
pub fn select_streaming_parallel_table_for_caps(caps: Caps) -> &'static ParallelTable {
&select_profile_for_caps(caps).streaming_parallel
}