belt/core/
utils.rs

1//! Utility functions for BELT.
2
3use serde_json::Value;
4
5use crate::Result;
6use crate::sanitize::parser::ProductionStatistic;
7use std::collections::HashMap;
8use std::path::PathBuf;
9use std::{path::Path, time::Duration};
10
11// Structs & Impls
12#[derive(Debug, Clone, Default)]
13pub enum RunOrder {
14    Sequential,
15    Random,
16    #[default]
17    Grouped,
18}
19
20/// Get a RunOrder from a string
21impl std::str::FromStr for RunOrder {
22    type Err = String;
23
24    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
25        match s.to_lowercase().as_str() {
26            "sequential" => Ok(RunOrder::Sequential),
27            "random" => Ok(RunOrder::Random),
28            "grouped" => Ok(RunOrder::Grouped),
29            _ => Err(BenchmarkErrorKind::InvalidRunOrder {
30                input: s.to_string(),
31            }
32            .to_string()),
33        }
34    }
35}
36
37// Formatting related utilities
38/// Helper function to turn a Duration into a nicely formatted string
39pub fn format_duration(duration: Duration) -> String {
40    let total_secs = duration.as_secs();
41
42    if total_secs < 60 {
43        format!("{total_secs}s")
44    } else if total_secs < 3600 {
45        let mins = total_secs / 60;
46        let secs = total_secs % 60;
47        format!("{mins}m{secs}s")
48    } else {
49        let hours = total_secs / 3600;
50        let mins = (total_secs % 3600) / 60;
51        format!("{hours}h{mins}m")
52    }
53}
54
55pub fn process_items(obj: &Value, stat_type: &str, items_vec: &mut Vec<ProductionStatistic>) {
56    if let Some(items_obj) = obj.get("items").and_then(|x| x.as_object()) {
57        for (item_name, quality_map) in items_obj {
58            if let Some(qualities) = quality_map.as_object() {
59                for (quality, count_val) in qualities {
60                    let count = match count_val.as_f64() {
61                        Some(c) => c as f32,
62                        None => {
63                            tracing::error!(
64                                "Invalid count for {} {} {}: {:?}",
65                                stat_type,
66                                item_name,
67                                quality,
68                                count_val
69                            );
70                            0.0
71                        }
72                    };
73                    items_vec.push(ProductionStatistic {
74                        statistic_type: stat_type.to_string(),
75                        name: item_name.clone(),
76                        quality: Some(quality.clone()),
77                        count,
78                    });
79                }
80            }
81        }
82    }
83}
84
85pub fn process_fluids(obj: &Value, stat_type: &str, fluids_vec: &mut Vec<ProductionStatistic>) {
86    if let Some(fluids_obj) = obj.get("fluids").and_then(|x| x.as_object()) {
87        for (fluid_name, count_val) in fluids_obj {
88            let count = match count_val.as_f64() {
89                Some(c) => c as f32,
90                None => {
91                    tracing::error!(
92                        "Invalid count for fluid {stat_type} {fluid_name}: {count_val:?}"
93                    );
94                    0.0
95                }
96            };
97            fluids_vec.push(ProductionStatistic {
98                statistic_type: stat_type.to_string(),
99                name: fluid_name.clone(),
100                quality: None,
101                count,
102            });
103        }
104    }
105}
106
107// File related utilities
108/// Find save files in a given path
109pub fn find_save_files(saves_dir: &Path, pattern: Option<&str>) -> Result<Vec<PathBuf>> {
110    if !saves_dir.exists() {
111        return Err(BenchmarkErrorKind::SaveDirectoryNotFound {
112            path: saves_dir.to_path_buf(),
113        }
114        .into());
115    }
116
117    // If the given path is a file, check the extension and return
118    if saves_dir.is_file() {
119        if saves_dir.extension().is_some_and(|ext| ext == "zip") {
120            return Ok(vec![saves_dir.to_path_buf()]);
121        } else {
122            return Err(BenchmarkErrorKind::InvalidSaveFile {
123                path: saves_dir.to_path_buf(),
124                reason: "Save file is not a .zip".to_string(),
125            }
126            .into());
127        }
128    }
129
130    // Set up the whole pattern
131    let pattern = pattern.unwrap_or("*");
132    let search_pattern = saves_dir.join(format!("{pattern}.zip"));
133
134    // Search using the pattern
135    let saves: Vec<PathBuf> = glob::glob(search_pattern.to_string_lossy().as_ref())?
136        .filter_map(std::result::Result::ok)
137        .collect();
138
139    // If empty, return
140    if saves.is_empty() {
141        return Err(BenchmarkErrorKind::NoSaveFilesFound {
142            pattern: pattern.to_string(),
143            directory: saves_dir.to_path_buf(),
144        }
145        .into());
146    }
147
148    tracing::info!("Found {} save files", saves.len());
149    for save in &saves {
150        tracing::debug!("  - {}", save.file_name().unwrap().to_string_lossy());
151    }
152
153    Ok(saves)
154}
155
156/// Validate found save files
157pub fn validate_save_files(save_files: &[PathBuf]) -> Result<()> {
158    for save_file in save_files {
159        // Check if file exists
160        if !save_file.exists() {
161            return Err(BenchmarkErrorKind::InvalidSaveFile {
162                path: save_file.clone(),
163                reason: "File does not exist".to_string(),
164            }
165            .into());
166        }
167
168        // Check extension
169        if save_file.extension().is_none_or(|ext| ext != "zip") {
170            tracing::warn!(
171                "Save file {} does not have .zip extension",
172                save_file.display()
173            );
174        }
175    }
176
177    Ok(())
178}
179
180pub fn find_blueprint_files(blueprint_dir: &Path, pattern: Option<&str>) -> Result<Vec<PathBuf>> {
181    if !blueprint_dir.exists() {
182        return Err(BenchmarkErrorKind::BlueprintDirectoryNotFound {
183            path: blueprint_dir.to_path_buf(),
184        }
185        .into());
186    }
187
188    // If the given path is a file that is ok
189    if blueprint_dir.is_file() {
190        return Ok(vec![blueprint_dir.to_path_buf()]);
191    }
192
193    // Set up the whole pattern
194    let pattern = pattern.unwrap_or("*");
195    let search_pattern = blueprint_dir.join(pattern);
196
197    // Search using the pattern
198    let bps: Vec<PathBuf> = glob::glob(search_pattern.to_string_lossy().as_ref())?
199        .filter_map(std::result::Result::ok)
200        .collect();
201
202    // If empty, return
203    if bps.is_empty() {
204        return Err(BenchmarkErrorKind::NoBlueprintFilesFound {
205            pattern: pattern.to_string(),
206            directory: blueprint_dir.to_path_buf(),
207        }
208        .into());
209    }
210
211    tracing::info!("Found {} blueprint files", bps.len());
212    for bp in &bps {
213        tracing::debug!("  - {}", bp.file_name().unwrap().to_string_lossy());
214    }
215
216    Ok(bps)
217}
218
219/// Finds files pertaining to benchmark's output
220pub fn find_data_files(data_dir: &Path) -> Result<Vec<PathBuf>> {
221    if !data_dir.is_dir() {
222        return Err(BenchmarkErrorKind::DataDirectoryNotFound {
223            path: data_dir.to_path_buf(),
224        }
225        .into());
226    }
227
228    let search_pattern = data_dir.join("*.csv");
229    // Search using the pattern
230    let files: Vec<PathBuf> = glob::glob(search_pattern.to_string_lossy().as_ref())?
231        .filter_map(std::result::Result::ok)
232        .collect();
233
234    // If empty, return
235    if files.is_empty() {
236        return Err(BenchmarkErrorKind::NoDataFilesFound {
237            path: data_dir.to_path_buf(),
238        }
239        .into());
240    }
241
242    tracing::info!("Found {} data files", files.len());
243    for file in &files {
244        tracing::debug!("  - {}", file.file_name().unwrap().to_string_lossy());
245    }
246
247    Ok(files)
248}
249
250#[cfg(unix)]
251use std::fs;
252#[cfg(unix)]
253use std::os::unix::fs::PermissionsExt;
254
255use crate::benchmark::parser::BenchmarkResult;
256use crate::benchmark::runner::VerboseData;
257use crate::core::error::BenchmarkErrorKind;
258
259/// Check if a file is an executable.
260pub fn is_executable(path: &Path) -> bool {
261    // On unix, check the 'execute' permission bit
262    #[cfg(unix)]
263    {
264        fs::metadata(path).is_ok_and(|metadata| {
265            metadata.is_file() && (metadata.permissions().mode() & 0o111 != 0)
266        })
267    }
268
269    #[cfg(windows)]
270    {
271        path.is_file()
272            && path
273                .extension()
274                .is_some_and(|ext| ext.eq_ignore_ascii_case("exe"))
275    }
276
277    // Fallback for other operating systems.
278    #[cfg(not(any(unix, windows)))]
279    {
280        metadata.is_file()
281    }
282}
283
284/// Check if the belt-sanitizer mod is active
285pub fn check_sanitizer() -> Option<PathBuf> {
286    get_default_user_data_dirs()
287        .iter()
288        .map(|base| base.join(PathBuf::from("script-output/belt")))
289        .find(|candidate| candidate.is_dir())
290}
291
292/// Check if the belt-sanitizer blueprint save file exists
293pub fn check_save_file(name: String) -> Option<PathBuf> {
294    get_default_user_data_dirs()
295        .iter()
296        .map(|base| base.join(format!("saves/{name}.zip")))
297        .find(|path| path.exists())
298}
299
300/// Find mod directory
301pub fn find_mod_directory() -> Option<PathBuf> {
302    get_default_user_data_dirs()
303        .iter()
304        .map(|base| base.join("mods"))
305        .find(|path| path.is_dir())
306}
307
308/// Tries to find [user data directory](https://wiki.factorio.com/Application_directory#User_data_directory)
309fn get_default_user_data_dirs() -> Vec<PathBuf> {
310    let mut paths = Vec::new();
311
312    let Some(home) = dirs::home_dir() else {
313        return paths;
314    };
315
316    if cfg!(target_os = "windows") {
317        paths.push(home.join("AppData/Roaming/Factorio"));
318    } else if cfg!(target_os = "linux") {
319        paths.push(home.join(".factorio"));
320        // Flatpak installations
321        paths.push(home.join(".var/app/com.valvesoftware.Steam/.factorio"));
322    } else if cfg!(target_os = "macos") {
323        paths.push(home.join("Library/Application Support/factorio"));
324    }
325
326    paths
327}
328
329// Math related utilities
330/// Calculate the base differences of a list of save's results.
331pub fn calculate_base_differences(results: &mut [BenchmarkResult]) {
332    // Calculate average effective_ups for each save
333    let avg_ups_per_save: Vec<f64> = results
334        .iter()
335        .map(|result| {
336            let total_ups: f64 = result.runs.iter().map(|run| run.effective_ups).sum();
337            total_ups / result.runs.len() as f64
338        })
339        .collect();
340
341    // Find the minimum average effective_ups across all saves
342    let min_avg_ups = avg_ups_per_save
343        .iter()
344        .min_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
345        .copied()
346        .unwrap_or(0.0);
347
348    // Calculate base_diff as percentage improvement for each run relative to the worst-performing save's average
349    for (result_idx, result) in results.iter_mut().enumerate() {
350        let save_avg_ups = avg_ups_per_save[result_idx];
351        let percentage_improvement = if min_avg_ups > 0.0 {
352            ((save_avg_ups - min_avg_ups) / min_avg_ups) * 100.0
353        } else {
354            0.0
355        };
356
357        for run in result.runs.iter_mut() {
358            run.base_diff = percentage_improvement;
359        }
360    }
361}
362
363/// Calculate simple moving average
364pub fn calculate_sma(data: &[f64], window_size: u32) -> Vec<f64> {
365    if window_size == 0 || data.is_empty() {
366        return data.to_vec(); // No smoothing or no data
367    }
368
369    let window_size = window_size as usize;
370    let mut smoothed_data = Vec::with_capacity(data.len());
371    let mut current_sum: f64 = 0.0;
372    let mut window_count: usize = 0;
373
374    for i in 0..data.len() {
375        current_sum += data[i];
376        window_count += 1;
377
378        if i >= window_size {
379            // Remove the oldest element that's falling out of the window
380            current_sum -= data[i - window_size];
381            window_count -= 1;
382        }
383
384        let avg = if window_count > 0 {
385            current_sum / window_count as f64
386        } else {
387            0.0
388        };
389        smoothed_data.push(avg);
390    }
391    smoothed_data
392}
393
394pub struct BoxplotData {
395    pub boxplot_values: Vec<Vec<f64>>,
396    pub outlier_values: Vec<Vec<f64>>,
397    pub category_names: Vec<String>,
398    pub min_value: f64,
399    pub max_value: f64,
400}
401
402/// Manually calculate the boxplot data given the benchmark results
403pub fn calculate_boxplot_data(results: &[BenchmarkResult]) -> BoxplotData {
404    // Collect save names
405    let save_names: Vec<String> = results
406        .iter()
407        .map(|result| result.save_name.clone())
408        .collect();
409
410    let mut grouped_boxplot_data: Vec<Vec<f64>> = Vec::new();
411    let mut outliers: Vec<(usize, f64)> = Vec::new();
412    let mut all_individual_ups: Vec<f64> = Vec::new();
413
414    // Iterate over every result and push UPS values
415    for result in results {
416        let mut values: Vec<f64> = result.runs.iter().map(|run| run.effective_ups).collect();
417        values.sort_by(|a, b| a.partial_cmp(b).unwrap());
418
419        all_individual_ups.extend(&values);
420        grouped_boxplot_data.push(values);
421    }
422
423    // Calculate boxplot statistics manually
424    let mut boxplot_data: Vec<Vec<f64>> = Vec::new();
425
426    for (category_idx, values) in grouped_boxplot_data.iter().enumerate() {
427        if values.is_empty() {
428            continue;
429        };
430
431        let len = values.len();
432        let q1_idx = len / 4;
433        let q2_idx = len / 2;
434        let q3_idx = (3 * len) / 4;
435
436        let q1 = values[q1_idx];
437        let q2 = values[q2_idx]; // median
438        let q3 = values[q3_idx];
439        let iqr = q3 - q1;
440
441        let lower_fence = q1 - 1.5 * iqr;
442        let upper_fence = q3 + 1.5 * iqr;
443
444        // Find whiskers (actual min/max within fences)
445        let lower_whisker = values
446            .iter()
447            .find(|&&v| v >= lower_fence)
448            .unwrap_or(&values[0]);
449        let upper_whisker = values
450            .iter()
451            .rev()
452            .find(|&&v| v <= upper_fence)
453            .unwrap_or(&values[len - 1]);
454
455        // Collect outliers
456        for &value in values {
457            if value < lower_fence || value > upper_fence {
458                outliers.push((category_idx, value));
459            }
460        }
461
462        // Boxplot data format: [min, Q1, median, Q3, max]
463        boxplot_data.push(vec![*lower_whisker, q1, q2, q3, *upper_whisker]);
464    }
465
466    let min_ups = all_individual_ups
467        .iter()
468        .cloned()
469        .min_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
470        .unwrap_or(0.0);
471
472    let max_ups = all_individual_ups
473        .iter()
474        .cloned()
475        .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
476        .unwrap_or(0.0);
477
478    // Convert outliers to scatter data
479    let scatter_data: Vec<Vec<f64>> = outliers
480        .into_iter()
481        .map(|(category, value)| vec![category as f64, value])
482        .collect();
483
484    BoxplotData {
485        boxplot_values: boxplot_data,
486        outlier_values: scatter_data,
487        category_names: save_names,
488        min_value: min_ups,
489        max_value: max_ups,
490    }
491}
492
493/// Compute global min/max for each metric across all saves and runs
494pub fn compute_global_metric_bounds(
495    all_verbose_data: &[VerboseData],
496    metrics_to_chart: &[String],
497    smooth_window: u32,
498) -> HashMap<String, (f64, f64)> {
499    let mut bounds: HashMap<String, (f64, f64)> = HashMap::new();
500
501    if all_verbose_data.is_empty() {
502        return bounds;
503    }
504
505    let mut reader = csv::Reader::from_reader(all_verbose_data[0].csv_data.as_bytes());
506    let headers: Vec<String> = reader
507        .headers()
508        .unwrap()
509        .iter()
510        .map(|s| s.to_string())
511        .collect();
512    let header_map: HashMap<String, usize> = headers
513        .clone()
514        .into_iter()
515        .enumerate()
516        .map(|(i, h)| (h, i))
517        .collect();
518
519    for metric_name in metrics_to_chart {
520        let mut all_smoothed_ns: Vec<f64> = Vec::new();
521
522        if let Some(&column_index) = header_map.get(metric_name) {
523            for run_data in all_verbose_data {
524                let mut inner_reader = csv::Reader::from_reader(run_data.csv_data.as_bytes());
525                let mut current_run_raw_values_ns: Vec<f64> = Vec::new();
526
527                for record_result in inner_reader.records() {
528                    let record = record_result.unwrap();
529                    if let Some(value_ns_str) = record.get(column_index)
530                        && let Ok(value_ns) = value_ns_str.parse::<f64>()
531                    {
532                        current_run_raw_values_ns.push(value_ns);
533                    }
534                }
535                let smoothed_run_values_ns =
536                    calculate_sma(&current_run_raw_values_ns, smooth_window);
537                all_smoothed_ns.extend(smoothed_run_values_ns);
538            }
539        }
540
541        if !all_smoothed_ns.is_empty() {
542            let n = all_smoothed_ns.len() as f64;
543            let mean = all_smoothed_ns.iter().sum::<f64>() / n;
544            let stddev = (all_smoothed_ns
545                .iter()
546                .map(|x| (x - mean).powi(2))
547                .sum::<f64>()
548                / n)
549                .sqrt();
550
551            let min_ns = (mean - 2.0 * stddev).max(0.0);
552            let max_ns = mean + 2.0 * stddev;
553
554            let min_ms = min_ns / 1_000_000.0;
555            let max_ms = max_ns / 1_000_000.0;
556
557            let (min_ms, max_ms) = if min_ms == max_ms {
558                let new_min = (min_ms * 0.9).max(0.0);
559                let new_max = (max_ms * 1.1).max(0.1);
560                (new_min, new_max)
561            } else {
562                (min_ms, max_ms)
563            };
564
565            bounds.insert(metric_name.clone(), (min_ms, max_ms));
566        }
567    }
568
569    bounds
570}
571
572pub fn round_to_precision_window(ticks: u32) -> u32 {
573    const ONE_MINUTE: u32 = 3600;
574    const TEN_MINUTES: u32 = 36000;
575    const ONE_HOUR: u32 = 216000;
576    const TEN_HOURS: u32 = 2160000;
577    const FIFTY_HOURS: u32 = 10800000;
578    const TWO_FIFTY_HOURS: u32 = 54000000;
579    const FIVE_SECONDS: u32 = 300;
580
581    // Find the appropriate window size and round up to nearest multiple
582    let window = if ticks >= TWO_FIFTY_HOURS {
583        TWO_FIFTY_HOURS
584    } else if ticks >= FIFTY_HOURS {
585        FIFTY_HOURS
586    } else if ticks >= TEN_HOURS {
587        TEN_HOURS
588    } else if ticks >= ONE_HOUR {
589        ONE_HOUR
590    } else if ticks >= TEN_MINUTES {
591        TEN_MINUTES
592    } else if ticks >= ONE_MINUTE {
593        ONE_MINUTE
594    } else {
595        FIVE_SECONDS
596    };
597
598    // Round up to nearest multiple of window
599    ticks.div_ceil(window) * window
600}
601
602/// Get operating system info
603pub fn get_os_info() -> String {
604    format!("{}-{}", std::env::consts::OS, std::env::consts::ARCH)
605}