Skip to main content

hexz_cli/cmd/data/
predict.rs

1use anyhow::Result;
2use hexz_ops::predict::{PredictConfig, predict};
3use indicatif::HumanBytes;
4use std::path::PathBuf;
5
6pub fn run(
7    path: PathBuf,
8    block_size: u32,
9    min_chunk: Option<u32>,
10    avg_chunk: Option<u32>,
11    max_chunk: Option<u32>,
12    json: bool,
13) -> Result<()> {
14    let config = PredictConfig {
15        path,
16        block_size: block_size as usize,
17        min_chunk,
18        avg_chunk,
19        max_chunk,
20        ..Default::default()
21    };
22
23    let report = predict(config)?;
24
25    if json {
26        println!("{}", serde_json::to_string_pretty(&report)?);
27        return Ok(());
28    }
29
30    println!("File:           {}", report.file_path);
31    println!("Size:           {}", HumanBytes(report.file_size));
32    println!("Block Size:     {}", HumanBytes(report.block_size as u64));
33    println!("Blocks Sampled: {}", report.blocks_sampled);
34    println!();
35
36    println!("  Zero Blocks:    {:.1}%", report.zero_block_pct * 100.0);
37    println!("  Mean Entropy:   {:.2} bits/byte", report.mean_entropy);
38    println!(
39        "  High Entropy:   {:.1}%  (incompressible)",
40        report.high_entropy_pct * 100.0
41    );
42    println!();
43
44    println!(
45        "  LZ4:            {:.1}x  ({:.1}% savings)  -> ~{}",
46        if report.lz4_ratio > 0.0 {
47            1.0 / report.lz4_ratio
48        } else {
49            f64::INFINITY
50        },
51        report.lz4_savings_pct,
52        HumanBytes(report.estimated_lz4_size)
53    );
54    println!(
55        "  Zstd (level 3): {:.1}x  ({:.1}% savings)  -> ~{}",
56        if report.zstd_ratio > 0.0 {
57            1.0 / report.zstd_ratio
58        } else {
59            f64::INFINITY
60        },
61        report.zstd_savings_pct,
62        HumanBytes(report.estimated_zstd_size)
63    );
64    println!();
65
66    println!(
67        "  Fixed Dedup:    {:.1}% savings",
68        report.fixed_dedup_savings_pct
69    );
70    println!(
71        "  CDC Dedup:      {:.1}% savings  ({} chunks, {} unique, min/avg/max {}/{}/{})",
72        report.cdc_dedup_savings_pct,
73        report.cdc_chunks_total,
74        report.cdc_chunks_unique,
75        HumanBytes(report.cdc_min_chunk as u64),
76        HumanBytes(report.cdc_avg_chunk as u64),
77        HumanBytes(report.cdc_max_chunk as u64),
78    );
79    println!();
80
81    println!(
82        "  LZ4 + fixed:    {}",
83        HumanBytes(report.estimated_packed_size_lz4_fixed)
84    );
85    println!(
86        "  Zstd + CDC:     {}  ({:.1}% reduction)",
87        HumanBytes(report.estimated_packed_size_zstd_cdc),
88        report.overall_best_savings_pct
89    );
90    println!();
91
92    // Recommendation
93    let file_path = &report.file_path;
94    if report.overall_best_savings_pct > 10.0 {
95        if report.cdc_dedup_savings_pct > 1.0 {
96            print!(
97                "Try: hexz pack output.hxz --disk {} --compression zstd",
98                file_path
99            );
100            print!(
101                " --min-chunk {} --avg-chunk {} --max-chunk {}",
102                report.cdc_min_chunk, report.cdc_avg_chunk, report.cdc_max_chunk
103            );
104            println!();
105        } else {
106            println!(
107                "Try: hexz pack output.hxz --disk {} --compression zstd",
108                file_path
109            );
110        }
111    } else if report.overall_best_savings_pct > 1.0 {
112        println!(
113            "Try: hexz pack output.hxz --disk {} --compression lz4",
114            file_path
115        );
116    } else {
117        println!("Data is mostly incompressible with minimal deduplication potential.");
118    }
119
120    Ok(())
121}