Skip to main content

dump_transform/
dump_transform.rs

1//! Dump post-transform data for external compression comparison.
2//!
3//! Usage: cargo run --release --example dump_transform -- <input> <output>
4//!
5//! Writes the transformed byte stream (what the CM engine sees) to <output>.
6//! Reports format, original size, transformed size, and metadata overhead.
7
8use datacortex_core::dcx::Mode;
9use datacortex_core::format::{detect_format, preprocess};
10use std::env;
11use std::fs;
12use std::process;
13
14fn main() {
15    let args: Vec<String> = env::args().collect();
16    if args.len() != 3 {
17        eprintln!("Usage: {} <input> <output>", args[0]);
18        process::exit(1);
19    }
20
21    let input_path = &args[1];
22    let output_path = &args[2];
23
24    let data = fs::read(input_path).expect("Failed to read input file");
25
26    // Detect format (try extension first, then content).
27    let format = datacortex_core::format::detect_from_extension(input_path)
28        .unwrap_or_else(|| detect_format(&data));
29
30    // Run preprocessing.
31    let (transformed, chain) = preprocess(&data, format, Mode::Balanced);
32
33    // Calculate metadata size.
34    let metadata_bytes = chain.serialize();
35    let metadata_size = metadata_bytes.len();
36
37    // Write transformed data to output.
38    fs::write(output_path, &transformed).expect("Failed to write output file");
39
40    // Report stats.
41    let name = std::path::Path::new(input_path)
42        .file_name()
43        .unwrap()
44        .to_str()
45        .unwrap();
46
47    eprintln!("=== {} ===", name);
48    eprintln!("Format:      {:?}", format);
49    eprintln!("Original:    {} bytes", data.len());
50    eprintln!("Transformed: {} bytes", transformed.len());
51    eprintln!("Metadata:    {} bytes", metadata_size);
52    eprintln!(
53        "Total (transform + meta): {} bytes",
54        transformed.len() + metadata_size
55    );
56    eprintln!(
57        "Transform ratio: {:.1}%",
58        (transformed.len() + metadata_size) as f64 / data.len() as f64 * 100.0
59    );
60
61    if transformed.len() == data.len() {
62        eprintln!("NOTE: No transform applied (output == input)");
63    }
64}