blvm_primitives/
ibd_tuning.rs

1//! IBD Hardware Tuning
2//!
3//! Derives batch verification and parallelization parameters from hardware
4//! (CPU count, cache size). Used when config does not supply explicit overrides.
5//!
6//! Precedence: Config override (if set) > Hardware-derived > Hardcoded default
7
8use std::sync::OnceLock;
9
10/// Hardware profile detected at first use.
11#[derive(Debug, Clone)]
12pub struct IbdHardwareProfile {
13    /// From std::thread::available_parallelism()
14    pub num_threads: usize,
15    /// L3 cache size in KB (None if unknown)
16    pub l3_cache_kb: Option<u64>,
17    /// Many-core system (16+ logical cores)
18    pub is_many_core: bool,
19}
20
21static HARDWARE_PROFILE: OnceLock<IbdHardwareProfile> = OnceLock::new();
22
23fn detect_hardware() -> IbdHardwareProfile {
24    let num_threads = std::thread::available_parallelism()
25        .map(|p| p.get())
26        .unwrap_or(1)
27        .max(1);
28
29    let l3_cache_kb = detect_l3_cache_kb();
30    let is_many_core = num_threads >= 16;
31
32    IbdHardwareProfile {
33        num_threads,
34        l3_cache_kb,
35        is_many_core,
36    }
37}
38
39/// Detect L3 cache size on Linux via /sys. Returns None on non-Linux or if unreadable.
40#[cfg(target_os = "linux")]
41fn detect_l3_cache_kb() -> Option<u64> {
42    use std::fs;
43    use std::path::Path;
44
45    let path = Path::new("/sys/devices/system/cpu/cpu0/cache/index3/size");
46    if !path.exists() {
47        return None;
48    }
49    let s = fs::read_to_string(path).ok()?.trim().to_string();
50    let (num, suffix) = s.split_at(s.len().saturating_sub(1));
51    let num: u64 = num.trim().parse().ok()?;
52    let mult = match suffix {
53        "K" | "k" => 1u64,
54        "M" | "m" => 1024,
55        _ => 1,
56    };
57    Some(num * mult)
58}
59
60#[cfg(not(target_os = "linux"))]
61fn detect_l3_cache_kb() -> Option<u64> {
62    None
63}
64
65fn hardware_profile() -> &'static IbdHardwareProfile {
66    HARDWARE_PROFILE.get_or_init(detect_hardware)
67}
68
69/// libsecp256k1 thresholds: n<64 uses ecmult_multi_simple_var (slow), n>=64 Strauss, n>=88 Pippenger.
70/// Chunks of 64-128 use Strauss; chunks of 89+ use Pippenger.
71pub const STRAUSS_MIN: usize = 64;
72pub const PIPPENGER_MIN_CHUNK: usize = 88;
73
74/// Chunk threshold: single batch when n <= this. Above = split for parallelism.
75/// Precedence: config_override > env > hardware-derived > 128.
76/// Hardware-derived: many-core (16+) → 96 (more parallelism); few-core → 128.
77pub fn chunk_threshold_config_or_hardware(config_override: Option<usize>) -> usize {
78    config_override
79        .or_else(|| {
80            std::env::var("BLVM_IBD_CHUNK_THRESHOLD")
81                .ok()
82                .and_then(|s| s.parse().ok())
83                .filter(|&n: &usize| n > 0 && n <= 1024)
84        })
85        .unwrap_or_else(|| {
86            let p = hardware_profile();
87            if p.is_many_core {
88                96 // more parallelism for ECDSA split
89            } else {
90                128
91            }
92        })
93}
94
95/// Min chunk size when splitting for parallelism. 128+ uses Pippenger (2-3× faster).
96/// Precedence: config_override > env > hardware-derived > 128.
97/// Hardware-derived: many-core → 64 (Strauss threshold); few-core → 128.
98pub fn min_chunk_size_config_or_hardware(config_override: Option<usize>) -> usize {
99    config_override
100        .or_else(|| {
101            std::env::var("BLVM_IBD_MIN_CHUNK_SIZE")
102                .ok()
103                .and_then(|s| s.parse().ok())
104                .filter(|&n: &usize| n > 0 && n <= 512)
105        })
106        .unwrap_or_else(|| {
107            let p = hardware_profile();
108            if p.is_many_core {
109                64 // Strauss threshold; more chunks for parallelism
110            } else {
111                128
112            }
113        })
114}
115
116/// Compute optimal chunk ranges for parallel batch verification.
117/// Splits n sigs into num_chunks such that each chunk has >= min_chunk sigs.
118/// min_chunk >= 1; smaller chunks use ecmult_multi_simple_var (n<64) but parallelism often wins.
119pub fn compute_chunk_ranges(n: usize, num_chunks: usize, min_chunk: usize) -> Vec<(usize, usize)> {
120    debug_assert!(num_chunks >= 1 && min_chunk >= 1);
121    if num_chunks == 1 {
122        return vec![(0, n)];
123    }
124    // Balanced split: base_size = n / num_chunks, first (n % num_chunks) chunks get +1
125    let base_size = n / num_chunks;
126    let remainder = n % num_chunks;
127    let mut ranges = Vec::with_capacity(num_chunks);
128    let mut start = 0;
129    for i in 0..num_chunks {
130        let chunk_len = base_size + if i < remainder { 1 } else { 0 };
131        if chunk_len > 0 {
132            ranges.push((start, start + chunk_len));
133            start += chunk_len;
134        }
135    }
136    debug_assert_eq!(start, n);
137    ranges
138}
139
140/// Chunk size for batch hash operations (SHA256, HASH160). Cache-friendly, fits in L1.
141/// Hardware-derived from L3 when known (L3/256 clamped 8–32); otherwise 16.
142/// Used by simd_vectorization for batch hashing.
143pub fn hash_batch_chunk_size() -> usize {
144    let p = hardware_profile();
145    let from_l3 = p.l3_cache_kb.map(|kb| (kb / 256) as usize);
146    let derived = from_l3.unwrap_or(16);
147    derived.clamp(8, 32)
148}
blvm_primitives/ibd_tuning.rs

blvm_primitives/
ibd_tuning.rs