blvm_primitives/
ibd_tuning.rs1use std::sync::OnceLock;
9
10#[derive(Debug, Clone)]
12pub struct IbdHardwareProfile {
13 pub num_threads: usize,
15 pub l3_cache_kb: Option<u64>,
17 pub is_many_core: bool,
19}
20
21static HARDWARE_PROFILE: OnceLock<IbdHardwareProfile> = OnceLock::new();
22
23fn detect_hardware() -> IbdHardwareProfile {
24 let num_threads = std::thread::available_parallelism()
25 .map(|p| p.get())
26 .unwrap_or(1)
27 .max(1);
28
29 let l3_cache_kb = detect_l3_cache_kb();
30 let is_many_core = num_threads >= 16;
31
32 IbdHardwareProfile {
33 num_threads,
34 l3_cache_kb,
35 is_many_core,
36 }
37}
38
39#[cfg(target_os = "linux")]
41fn detect_l3_cache_kb() -> Option<u64> {
42 use std::fs;
43 use std::path::Path;
44
45 let path = Path::new("/sys/devices/system/cpu/cpu0/cache/index3/size");
46 if !path.exists() {
47 return None;
48 }
49 let s = fs::read_to_string(path).ok()?.trim().to_string();
50 let (num, suffix) = s.split_at(s.len().saturating_sub(1));
51 let num: u64 = num.trim().parse().ok()?;
52 let mult = match suffix {
53 "K" | "k" => 1u64,
54 "M" | "m" => 1024,
55 _ => 1,
56 };
57 Some(num * mult)
58}
59
60#[cfg(not(target_os = "linux"))]
61fn detect_l3_cache_kb() -> Option<u64> {
62 None
63}
64
65fn hardware_profile() -> &'static IbdHardwareProfile {
66 HARDWARE_PROFILE.get_or_init(detect_hardware)
67}
68
69pub const STRAUSS_MIN: usize = 64;
72pub const PIPPENGER_MIN_CHUNK: usize = 88;
73
74pub fn chunk_threshold_config_or_hardware(config_override: Option<usize>) -> usize {
78 config_override
79 .or_else(|| {
80 std::env::var("BLVM_IBD_CHUNK_THRESHOLD")
81 .ok()
82 .and_then(|s| s.parse().ok())
83 .filter(|&n: &usize| n > 0 && n <= 1024)
84 })
85 .unwrap_or_else(|| {
86 let p = hardware_profile();
87 if p.is_many_core {
88 96 } else {
90 128
91 }
92 })
93}
94
95pub fn min_chunk_size_config_or_hardware(config_override: Option<usize>) -> usize {
99 config_override
100 .or_else(|| {
101 std::env::var("BLVM_IBD_MIN_CHUNK_SIZE")
102 .ok()
103 .and_then(|s| s.parse().ok())
104 .filter(|&n: &usize| n > 0 && n <= 512)
105 })
106 .unwrap_or_else(|| {
107 let p = hardware_profile();
108 if p.is_many_core {
109 64 } else {
111 128
112 }
113 })
114}
115
116pub fn compute_chunk_ranges(n: usize, num_chunks: usize, min_chunk: usize) -> Vec<(usize, usize)> {
120 debug_assert!(num_chunks >= 1 && min_chunk >= 1);
121 if num_chunks == 1 {
122 return vec![(0, n)];
123 }
124 let base_size = n / num_chunks;
126 let remainder = n % num_chunks;
127 let mut ranges = Vec::with_capacity(num_chunks);
128 let mut start = 0;
129 for i in 0..num_chunks {
130 let chunk_len = base_size + if i < remainder { 1 } else { 0 };
131 if chunk_len > 0 {
132 ranges.push((start, start + chunk_len));
133 start += chunk_len;
134 }
135 }
136 debug_assert_eq!(start, n);
137 ranges
138}
139
140pub fn hash_batch_chunk_size() -> usize {
144 let p = hardware_profile();
145 let from_l3 = p.l3_cache_kb.map(|kb| (kb / 256) as usize);
146 let derived = from_l3.unwrap_or(16);
147 derived.clamp(8, 32)
148}