hexz_cli/cmd/data/
predict.rs1use anyhow::Result;
4use colored::Colorize;
5use hexz_ops::predict::{PredictConfig, predict};
6use indicatif::HumanBytes;
7use std::path::PathBuf;
8
9pub fn run(
11 path: PathBuf,
12 block_size: u32,
13 min_chunk: Option<u32>,
14 avg_chunk: Option<u32>,
15 max_chunk: Option<u32>,
16 json: bool,
17) -> Result<()> {
18 let config = PredictConfig {
19 path,
20 block_size: block_size as usize,
21 min_chunk,
22 avg_chunk,
23 max_chunk,
24 ..Default::default()
25 };
26
27 let report = predict(&config)?;
28
29 if json {
30 println!("{}", serde_json::to_string_pretty(&report)?);
31 return Ok(());
32 }
33
34 println!("{} Prediction {}", "╭".dimmed(), report.file_path.cyan());
35 println!(
36 "{} Size {}",
37 "│".dimmed(),
38 HumanBytes(report.file_size).to_string().green()
39 );
40 println!(
41 "{} Block Size {}",
42 "╰".dimmed(),
43 HumanBytes(report.block_size as u64)
44 .to_string()
45 .bright_black()
46 );
47
48 println!("\n {} Statistics:", "→".yellow());
49 println!(
50 " {} Zero Blocks {:.1}%",
51 "→".dimmed(),
52 report.zero_block_pct * 100.0
53 );
54 println!(
55 " {} Mean Entropy {:.2} bits/byte",
56 "→".dimmed(),
57 report.mean_entropy
58 );
59 println!(
60 " {} High Entropy {:.1}%",
61 "→".dimmed(),
62 report.high_entropy_pct * 100.0
63 );
64
65 println!("\n {} Estimation:", "→".yellow());
66 println!(
67 " {} LZ4 {:.1}x ({})",
68 "→".dimmed(),
69 if report.lz4_ratio > 0.0 {
70 1.0 / report.lz4_ratio
71 } else {
72 0.0
73 },
74 HumanBytes(report.estimated_lz4_size)
75 .to_string()
76 .bright_black()
77 );
78 println!(
79 " {} Zstd (lvl 3) {:.1}x ({})",
80 "→".dimmed(),
81 if report.zstd_ratio > 0.0 {
82 1.0 / report.zstd_ratio
83 } else {
84 0.0
85 },
86 HumanBytes(report.estimated_zstd_size)
87 .to_string()
88 .bright_black()
89 );
90
91 println!("\n {} Deduplication:", "→".yellow());
92 println!(
93 " {} Fixed Dedup {:.1}% savings",
94 "→".dimmed(),
95 report.fixed_dedup_savings_pct
96 );
97 println!(
98 " {} CDC Dedup {:.1}% savings",
99 "→".dimmed(),
100 report.cdc_dedup_savings_pct
101 );
102
103 println!("\n {} Combined Best:", "→".yellow());
104 println!(
105 " {} Zstd + CDC {} ({:.1}% reduction)",
106 "→".dimmed(),
107 HumanBytes(report.estimated_packed_size_zstd_cdc)
108 .to_string()
109 .green(),
110 report.overall_best_savings_pct
111 );
112
113 println!("\n {} Recommendation:", "→".yellow());
114 if report.overall_best_savings_pct > 10.0 {
116 if report.cdc_dedup_savings_pct > 1.0 {
117 println!(
118 " {} Use {} with {} blocks and {} algorithm",
119 "→".dimmed(),
120 "CDC packing".cyan(),
121 "zstd".magenta(),
122 "zstd".magenta()
123 );
124 } else {
125 println!(
126 " {} Use {} with {} algorithm",
127 "→".dimmed(),
128 "standard packing".cyan(),
129 "zstd".magenta()
130 );
131 }
132 } else if report.overall_best_savings_pct > 1.0 {
133 println!(
134 " {} Use {} with {} algorithm",
135 "→".dimmed(),
136 "standard packing".cyan(),
137 "lz4".magenta()
138 );
139 } else {
140 println!(" {} Data is mostly incompressible.", "→".dimmed());
141 }
142
143 Ok(())
144}