use std::sync::OnceLock;
#[derive(Debug, Clone)]
pub struct IbdHardwareProfile {
pub num_threads: usize,
pub l3_cache_kb: Option<u64>,
pub is_many_core: bool,
}
static HARDWARE_PROFILE: OnceLock<IbdHardwareProfile> = OnceLock::new();
fn detect_hardware() -> IbdHardwareProfile {
let num_threads = std::thread::available_parallelism()
.map(|p| p.get())
.unwrap_or(1)
.max(1);
let l3_cache_kb = detect_l3_cache_kb();
let is_many_core = num_threads >= 16;
IbdHardwareProfile {
num_threads,
l3_cache_kb,
is_many_core,
}
}
#[cfg(target_os = "linux")]
fn detect_l3_cache_kb() -> Option<u64> {
use std::fs;
use std::path::Path;
let path = Path::new("/sys/devices/system/cpu/cpu0/cache/index3/size");
if !path.exists() {
return None;
}
let s = fs::read_to_string(path).ok()?.trim().to_string();
let (num, suffix) = s.split_at(s.len().saturating_sub(1));
let num: u64 = num.trim().parse().ok()?;
let mult = match suffix {
"K" | "k" => 1u64,
"M" | "m" => 1024,
_ => 1,
};
Some(num * mult)
}
#[cfg(not(target_os = "linux"))]
fn detect_l3_cache_kb() -> Option<u64> {
None
}
fn hardware_profile() -> &'static IbdHardwareProfile {
HARDWARE_PROFILE.get_or_init(detect_hardware)
}
pub const STRAUSS_MIN: usize = 64;
pub const PIPPENGER_MIN_CHUNK: usize = 88;
pub fn chunk_threshold_config_or_hardware(config_override: Option<usize>) -> usize {
config_override
.or_else(|| {
std::env::var("BLVM_IBD_CHUNK_THRESHOLD")
.ok()
.and_then(|s| s.parse().ok())
.filter(|&n: &usize| n > 0 && n <= 1024)
})
.unwrap_or_else(|| {
let p = hardware_profile();
if p.is_many_core {
96 } else {
128
}
})
}
pub fn min_chunk_size_config_or_hardware(config_override: Option<usize>) -> usize {
config_override
.or_else(|| {
std::env::var("BLVM_IBD_MIN_CHUNK_SIZE")
.ok()
.and_then(|s| s.parse().ok())
.filter(|&n: &usize| n > 0 && n <= 512)
})
.unwrap_or_else(|| {
let p = hardware_profile();
if p.is_many_core {
64 } else {
128
}
})
}
pub fn compute_chunk_ranges(n: usize, num_chunks: usize, min_chunk: usize) -> Vec<(usize, usize)> {
debug_assert!(num_chunks >= 1 && min_chunk >= 1);
if num_chunks == 1 {
return vec![(0, n)];
}
let base_size = n / num_chunks;
let remainder = n % num_chunks;
let mut ranges = Vec::with_capacity(num_chunks);
let mut start = 0;
for i in 0..num_chunks {
let chunk_len = base_size + if i < remainder { 1 } else { 0 };
if chunk_len > 0 {
ranges.push((start, start + chunk_len));
start += chunk_len;
}
}
debug_assert_eq!(start, n);
ranges
}
pub fn hash_batch_chunk_size() -> usize {
let p = hardware_profile();
let from_l3 = p.l3_cache_kb.map(|kb| (kb / 256) as usize);
let derived = from_l3.unwrap_or(16);
derived.clamp(8, 32)
}