use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
use std::time::Instant;
pub const DEFAULT_PARALLEL_THRESHOLD_BYTES: u64 = 64 * 1024;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct ParallelConfig {
pub parallel_threshold_bytes: u64,
pub calibrated_at: String,
pub cpu_info: String,
}
impl Default for ParallelConfig {
fn default() -> Self {
Self {
parallel_threshold_bytes: DEFAULT_PARALLEL_THRESHOLD_BYTES,
calibrated_at: String::new(),
cpu_info: String::new(),
}
}
}
impl ParallelConfig {
pub fn should_parallelize(&self, mean_file_bytes: u64) -> bool {
mean_file_bytes >= self.parallel_threshold_bytes
}
pub fn config_path() -> PathBuf {
crate::config::config_path()
}
pub fn load_or_default(path: &Path) -> Self {
crate::config::BlazeConfig::load(path)
.parallel
.unwrap_or_default()
}
pub fn save(&self, path: &Path) -> Result<()> {
let mut cfg = crate::config::BlazeConfig::load(path);
cfg.parallel = Some(self.clone());
cfg.save(path)
}
}
pub fn calibrate() -> ParallelConfig {
use crate::algorithm::{hash_bytes, Algorithm};
use rayon::prelude::*;
let test_sizes: &[u64] = &[
4 * 1024,
16 * 1024,
64 * 1024,
256 * 1024,
1024 * 1024,
4 * 1024 * 1024,
];
let n_files = 64usize;
let warmup = 2usize;
let timed = 5usize;
let mut threshold = DEFAULT_PARALLEL_THRESHOLD_BYTES;
eprintln!("[*] Calibrating parallel threshold ({n_files} files × each size, {timed} runs)...");
for &size in test_sizes {
let data: Vec<u8> = (0..size as usize).map(|i| i as u8).collect();
let files: Vec<&[u8]> = vec![&data; n_files];
for _ in 0..warmup {
files
.iter()
.for_each(|f| drop(hash_bytes(Algorithm::Sha256, f)));
files
.par_iter()
.for_each(|f| drop(hash_bytes(Algorithm::Sha256, f)));
}
let t0 = Instant::now();
for _ in 0..timed {
files
.iter()
.for_each(|f| drop(hash_bytes(Algorithm::Sha256, f)));
}
let seq_us = t0.elapsed().as_micros() / timed as u128;
let t0 = Instant::now();
for _ in 0..timed {
files
.par_iter()
.for_each(|f| drop(hash_bytes(Algorithm::Sha256, f)));
}
let par_us = t0.elapsed().as_micros() / timed as u128;
let size_kib = size / 1024;
eprintln!(
" {size_kib} KiB × {n_files}: seq={seq_us}µs par={par_us}µs{}",
if par_us < seq_us {
" ← parallel wins"
} else {
""
}
);
if par_us > 0 && par_us * 10 < seq_us * 9 {
threshold = size;
break;
}
}
eprintln!("[+] Threshold set to {} KiB/file", threshold / 1024);
ParallelConfig {
parallel_threshold_bytes: threshold,
calibrated_at: today_iso8601(),
cpu_info: cpu_info_string(),
}
}
pub fn calibrate_and_save() -> Result<()> {
calibrate_and_save_to(&ParallelConfig::config_path())
}
pub fn calibrate_and_save_to(path: &Path) -> Result<()> {
let cfg = calibrate();
cfg.save(path)?;
eprintln!("[+] Parallel config written to {}", path.display());
Ok(())
}
fn today_iso8601() -> String {
use std::time::{SystemTime, UNIX_EPOCH};
let secs = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let days = secs / 86400;
let year = 1970 + days / 365;
let doy = days % 365;
let month = (doy / 30) + 1;
let day = (doy % 30) + 1;
format!("{year:04}-{month:02}-{day:02}")
}
fn cpu_info_string() -> String {
#[cfg(target_os = "macos")]
{
let out = std::process::Command::new("sysctl")
.args(["-n", "machdep.cpu.brand_string"])
.output();
if let Ok(o) = out {
let s = String::from_utf8_lossy(&o.stdout).trim().to_string();
if !s.is_empty() {
return s;
}
}
}
#[cfg(target_os = "linux")]
{
if let Ok(content) = std::fs::read_to_string("/proc/cpuinfo") {
for line in content.lines() {
if line.starts_with("model name") {
if let Some(val) = line.split_once(':').map(|x| x.1) {
return val.trim().to_string();
}
}
}
}
}
std::env::consts::ARCH.to_string()
}