hexz_cli/cmd/data/
predict.rs1use anyhow::Result;
2use hexz_ops::predict::{PredictConfig, predict};
3use indicatif::HumanBytes;
4use std::path::PathBuf;
5
6pub fn run(
7 path: PathBuf,
8 block_size: u32,
9 min_chunk: Option<u32>,
10 avg_chunk: Option<u32>,
11 max_chunk: Option<u32>,
12 json: bool,
13) -> Result<()> {
14 let config = PredictConfig {
15 path,
16 block_size: block_size as usize,
17 min_chunk,
18 avg_chunk,
19 max_chunk,
20 ..Default::default()
21 };
22
23 let report = predict(config)?;
24
25 if json {
26 println!("{}", serde_json::to_string_pretty(&report)?);
27 return Ok(());
28 }
29
30 println!("File: {}", report.file_path);
31 println!("Size: {}", HumanBytes(report.file_size));
32 println!("Block Size: {}", HumanBytes(report.block_size as u64));
33 println!("Blocks Sampled: {}", report.blocks_sampled);
34 println!();
35
36 println!(" Zero Blocks: {:.1}%", report.zero_block_pct * 100.0);
37 println!(" Mean Entropy: {:.2} bits/byte", report.mean_entropy);
38 println!(
39 " High Entropy: {:.1}% (incompressible)",
40 report.high_entropy_pct * 100.0
41 );
42 println!();
43
44 println!(
45 " LZ4: {:.1}x ({:.1}% savings) -> ~{}",
46 if report.lz4_ratio > 0.0 {
47 1.0 / report.lz4_ratio
48 } else {
49 f64::INFINITY
50 },
51 report.lz4_savings_pct,
52 HumanBytes(report.estimated_lz4_size)
53 );
54 println!(
55 " Zstd (level 3): {:.1}x ({:.1}% savings) -> ~{}",
56 if report.zstd_ratio > 0.0 {
57 1.0 / report.zstd_ratio
58 } else {
59 f64::INFINITY
60 },
61 report.zstd_savings_pct,
62 HumanBytes(report.estimated_zstd_size)
63 );
64 println!();
65
66 println!(
67 " Fixed Dedup: {:.1}% savings",
68 report.fixed_dedup_savings_pct
69 );
70 println!(
71 " CDC Dedup: {:.1}% savings ({} chunks, {} unique, min/avg/max {}/{}/{})",
72 report.cdc_dedup_savings_pct,
73 report.cdc_chunks_total,
74 report.cdc_chunks_unique,
75 HumanBytes(report.cdc_min_chunk as u64),
76 HumanBytes(report.cdc_avg_chunk as u64),
77 HumanBytes(report.cdc_max_chunk as u64),
78 );
79 println!();
80
81 println!(
82 " LZ4 + fixed: {}",
83 HumanBytes(report.estimated_packed_size_lz4_fixed)
84 );
85 println!(
86 " Zstd + CDC: {} ({:.1}% reduction)",
87 HumanBytes(report.estimated_packed_size_zstd_cdc),
88 report.overall_best_savings_pct
89 );
90 println!();
91
92 let file_path = &report.file_path;
94 if report.overall_best_savings_pct > 10.0 {
95 if report.cdc_dedup_savings_pct > 1.0 {
96 print!(
97 "Try: hexz pack output.hxz --disk {} --compression zstd",
98 file_path
99 );
100 print!(
101 " --min-chunk {} --avg-chunk {} --max-chunk {}",
102 report.cdc_min_chunk, report.cdc_avg_chunk, report.cdc_max_chunk
103 );
104 println!();
105 } else {
106 println!(
107 "Try: hexz pack output.hxz --disk {} --compression zstd",
108 file_path
109 );
110 }
111 } else if report.overall_best_savings_pct > 1.0 {
112 println!(
113 "Try: hexz pack output.hxz --disk {} --compression lz4",
114 file_path
115 );
116 } else {
117 println!("Data is mostly incompressible with minimal deduplication potential.");
118 }
119
120 Ok(())
121}