hexz_cli/cmd/data/
predict.rs1use anyhow::Result;
4use hexz_ops::predict::{PredictConfig, predict};
5use indicatif::HumanBytes;
6use std::path::PathBuf;
7use colored::Colorize;
8
9pub fn run(
11 path: PathBuf,
12 block_size: u32,
13 min_chunk: Option<u32>,
14 avg_chunk: Option<u32>,
15 max_chunk: Option<u32>,
16 json: bool,
17) -> Result<()> {
18 let config = PredictConfig {
19 path,
20 block_size: block_size as usize,
21 min_chunk,
22 avg_chunk,
23 max_chunk,
24 ..Default::default()
25 };
26
27 let report = predict(&config)?;
28
29 if json {
30 println!("{}", serde_json::to_string_pretty(&report)?);
31 return Ok(());
32 }
33
34 println!("{} Prediction {}", "╭".dimmed(), report.file_path.cyan());
35 println!("{} Size {}", "│".dimmed(), HumanBytes(report.file_size).to_string().green());
36 println!("{} Block Size {}", "╰".dimmed(), HumanBytes(report.block_size as u64).to_string().bright_black());
37
38 println!("\n {} Statistics:", "→".yellow());
39 println!(" {} Zero Blocks {:.1}%", "→".dimmed(), report.zero_block_pct * 100.0);
40 println!(" {} Mean Entropy {:.2} bits/byte", "→".dimmed(), report.mean_entropy);
41 println!(" {} High Entropy {:.1}%", "→".dimmed(), report.high_entropy_pct * 100.0);
42
43 println!("\n {} Estimation:", "→".yellow());
44 println!(
45 " {} LZ4 {:.1}x ({})",
46 "→".dimmed(),
47 if report.lz4_ratio > 0.0 { 1.0 / report.lz4_ratio } else { 0.0 },
48 HumanBytes(report.estimated_lz4_size).to_string().bright_black()
49 );
50 println!(
51 " {} Zstd (lvl 3) {:.1}x ({})",
52 "→".dimmed(),
53 if report.zstd_ratio > 0.0 { 1.0 / report.zstd_ratio } else { 0.0 },
54 HumanBytes(report.estimated_zstd_size).to_string().bright_black()
55 );
56
57 println!("\n {} Deduplication:", "→".yellow());
58 println!(" {} Fixed Dedup {:.1}% savings", "→".dimmed(), report.fixed_dedup_savings_pct);
59 println!(" {} CDC Dedup {:.1}% savings", "→".dimmed(), report.cdc_dedup_savings_pct);
60
61 println!("\n {} Combined Best:", "→".yellow());
62 println!(
63 " {} Zstd + CDC {} ({:.1}% reduction)",
64 "→".dimmed(),
65 HumanBytes(report.estimated_packed_size_zstd_cdc).to_string().green(),
66 report.overall_best_savings_pct
67 );
68
69 println!("\n {} Recommendation:", "→".yellow());
70 if report.overall_best_savings_pct > 10.0 {
72 if report.cdc_dedup_savings_pct > 1.0 {
73 println!(" {} Use {} with {} blocks and {} algorithm", "→".dimmed(), "CDC packing".cyan(), "zstd".magenta(), "zstd".magenta());
74 } else {
75 println!(" {} Use {} with {} algorithm", "→".dimmed(), "standard packing".cyan(), "zstd".magenta());
76 }
77 } else if report.overall_best_savings_pct > 1.0 {
78 println!(" {} Use {} with {} algorithm", "→".dimmed(), "standard packing".cyan(), "lz4".magenta());
79 } else {
80 println!(" {} Data is mostly incompressible.", "→".dimmed());
81 }
82
83 Ok(())
84}