Skip to main content

hexz_cli/cmd/data/
predict.rs

1//! Predict compression and deduplication potential.
2
3use anyhow::Result;
4use hexz_ops::predict::{PredictConfig, predict};
5use indicatif::HumanBytes;
6use std::path::PathBuf;
7use colored::Colorize;
8
9/// Execute the `hexz predict` command to estimate compression and deduplication potential.
10pub fn run(
11    path: PathBuf,
12    block_size: u32,
13    min_chunk: Option<u32>,
14    avg_chunk: Option<u32>,
15    max_chunk: Option<u32>,
16    json: bool,
17) -> Result<()> {
18    let config = PredictConfig {
19        path,
20        block_size: block_size as usize,
21        min_chunk,
22        avg_chunk,
23        max_chunk,
24        ..Default::default()
25    };
26
27    let report = predict(&config)?;
28
29    if json {
30        println!("{}", serde_json::to_string_pretty(&report)?);
31        return Ok(());
32    }
33
34    println!("{} Prediction   {}", "╭".dimmed(), report.file_path.cyan());
35    println!("{} Size         {}", "│".dimmed(), HumanBytes(report.file_size).to_string().green());
36    println!("{} Block Size   {}", "╰".dimmed(), HumanBytes(report.block_size as u64).to_string().bright_black());
37
38    println!("\n  {} Statistics:", "→".yellow());
39    println!("    {} Zero Blocks    {:.1}%", "→".dimmed(), report.zero_block_pct * 100.0);
40    println!("    {} Mean Entropy   {:.2} bits/byte", "→".dimmed(), report.mean_entropy);
41    println!("    {} High Entropy   {:.1}%", "→".dimmed(), report.high_entropy_pct * 100.0);
42
43    println!("\n  {} Estimation:", "→".yellow());
44    println!(
45        "    {} LZ4            {:.1}x  ({})",
46        "→".dimmed(),
47        if report.lz4_ratio > 0.0 { 1.0 / report.lz4_ratio } else { 0.0 },
48        HumanBytes(report.estimated_lz4_size).to_string().bright_black()
49    );
50    println!(
51        "    {} Zstd (lvl 3)   {:.1}x  ({})",
52        "→".dimmed(),
53        if report.zstd_ratio > 0.0 { 1.0 / report.zstd_ratio } else { 0.0 },
54        HumanBytes(report.estimated_zstd_size).to_string().bright_black()
55    );
56
57    println!("\n  {} Deduplication:", "→".yellow());
58    println!("    {} Fixed Dedup    {:.1}% savings", "→".dimmed(), report.fixed_dedup_savings_pct);
59    println!("    {} CDC Dedup      {:.1}% savings", "→".dimmed(), report.cdc_dedup_savings_pct);
60
61    println!("\n  {} Combined Best:", "→".yellow());
62    println!(
63        "    {} Zstd + CDC     {}  ({:.1}% reduction)",
64        "→".dimmed(),
65        HumanBytes(report.estimated_packed_size_zstd_cdc).to_string().green(),
66        report.overall_best_savings_pct
67    );
68
69    println!("\n  {} Recommendation:", "→".yellow());
70    // Recommendation logic...
71    if report.overall_best_savings_pct > 10.0 {
72        if report.cdc_dedup_savings_pct > 1.0 {
73            println!("    {} Use {} with {} blocks and {} algorithm", "→".dimmed(), "CDC packing".cyan(), "zstd".magenta(), "zstd".magenta());
74        } else {
75            println!("    {} Use {} with {} algorithm", "→".dimmed(), "standard packing".cyan(), "zstd".magenta());
76        }
77    } else if report.overall_best_savings_pct > 1.0 {
78        println!("    {} Use {} with {} algorithm", "→".dimmed(), "standard packing".cyan(), "lz4".magenta());
79    } else {
80        println!("    {} Data is mostly incompressible.", "→".dimmed());
81    }
82
83    Ok(())
84}