Skip to main content

hexz_cli/cmd/data/
predict.rs

1//! Predict compression and deduplication potential.
2
3use anyhow::Result;
4use colored::Colorize;
5use hexz_ops::predict::{PredictConfig, predict};
6use indicatif::HumanBytes;
7use std::path::PathBuf;
8
9/// Execute the `hexz predict` command to estimate compression and deduplication potential.
10pub fn run(
11    path: PathBuf,
12    block_size: u32,
13    min_chunk: Option<u32>,
14    avg_chunk: Option<u32>,
15    max_chunk: Option<u32>,
16    json: bool,
17) -> Result<()> {
18    let config = PredictConfig {
19        path,
20        block_size: block_size as usize,
21        min_chunk,
22        avg_chunk,
23        max_chunk,
24        ..Default::default()
25    };
26
27    let report = predict(&config)?;
28
29    if json {
30        println!("{}", serde_json::to_string_pretty(&report)?);
31        return Ok(());
32    }
33
34    println!("{} Prediction   {}", "╭".dimmed(), report.file_path.cyan());
35    println!(
36        "{} Size         {}",
37        "│".dimmed(),
38        HumanBytes(report.file_size).to_string().green()
39    );
40    println!(
41        "{} Block Size   {}",
42        "╰".dimmed(),
43        HumanBytes(report.block_size as u64)
44            .to_string()
45            .bright_black()
46    );
47
48    println!("\n  {} Statistics:", "→".yellow());
49    println!(
50        "    {} Zero Blocks    {:.1}%",
51        "→".dimmed(),
52        report.zero_block_pct * 100.0
53    );
54    println!(
55        "    {} Mean Entropy   {:.2} bits/byte",
56        "→".dimmed(),
57        report.mean_entropy
58    );
59    println!(
60        "    {} High Entropy   {:.1}%",
61        "→".dimmed(),
62        report.high_entropy_pct * 100.0
63    );
64
65    println!("\n  {} Estimation:", "→".yellow());
66    println!(
67        "    {} LZ4            {:.1}x  ({})",
68        "→".dimmed(),
69        if report.lz4_ratio > 0.0 {
70            1.0 / report.lz4_ratio
71        } else {
72            0.0
73        },
74        HumanBytes(report.estimated_lz4_size)
75            .to_string()
76            .bright_black()
77    );
78    println!(
79        "    {} Zstd (lvl 3)   {:.1}x  ({})",
80        "→".dimmed(),
81        if report.zstd_ratio > 0.0 {
82            1.0 / report.zstd_ratio
83        } else {
84            0.0
85        },
86        HumanBytes(report.estimated_zstd_size)
87            .to_string()
88            .bright_black()
89    );
90
91    println!("\n  {} Deduplication:", "→".yellow());
92    println!(
93        "    {} Fixed Dedup    {:.1}% savings",
94        "→".dimmed(),
95        report.fixed_dedup_savings_pct
96    );
97    println!(
98        "    {} CDC Dedup      {:.1}% savings",
99        "→".dimmed(),
100        report.cdc_dedup_savings_pct
101    );
102
103    println!("\n  {} Combined Best:", "→".yellow());
104    println!(
105        "    {} Zstd + CDC     {}  ({:.1}% reduction)",
106        "→".dimmed(),
107        HumanBytes(report.estimated_packed_size_zstd_cdc)
108            .to_string()
109            .green(),
110        report.overall_best_savings_pct
111    );
112
113    println!("\n  {} Recommendation:", "→".yellow());
114    // Recommendation logic...
115    if report.overall_best_savings_pct > 10.0 {
116        if report.cdc_dedup_savings_pct > 1.0 {
117            println!(
118                "    {} Use {} with {} blocks and {} algorithm",
119                "→".dimmed(),
120                "CDC packing".cyan(),
121                "zstd".magenta(),
122                "zstd".magenta()
123            );
124        } else {
125            println!(
126                "    {} Use {} with {} algorithm",
127                "→".dimmed(),
128                "standard packing".cyan(),
129                "zstd".magenta()
130            );
131        }
132    } else if report.overall_best_savings_pct > 1.0 {
133        println!(
134            "    {} Use {} with {} algorithm",
135            "→".dimmed(),
136            "standard packing".cyan(),
137            "lz4".magenta()
138        );
139    } else {
140        println!("    {} Data is mostly incompressible.", "→".dimmed());
141    }
142
143    Ok(())
144}