use std::env;
use std::error::Error;
use std::io::{self, Cursor, Read, Write};
use std::time::Instant;
use flate2::Compression;
use flate2::read::GzDecoder;
use flate2::write::GzEncoder;
use lz4_flex::{compress_prepend_size, decompress_size_prepended};
use paracletics_hypercube::compression::{Codec, compress_with, decompress_with};
use paracletics_hypercube::datasets::{harmonic_wave, mixed_signal, pseudo_random, pulse_train};
use paracletics_hypercube::media_samples::{
synth_symbolic_words, synth_wav_musical_light, synth_y4m_musical_light,
};
const MI_B: f64 = 1024.0 * 1024.0;
const DEFAULT_ITERATIONS: usize = 8;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum CandidateCodec {
Chromoharmonic,
DeltaPulse,
RunLength,
Lz4,
Gzip,
Zstd,
}
impl CandidateCodec {
const ALL: [CandidateCodec; 6] = [
CandidateCodec::Chromoharmonic,
CandidateCodec::DeltaPulse,
CandidateCodec::RunLength,
CandidateCodec::Lz4,
CandidateCodec::Gzip,
CandidateCodec::Zstd,
];
fn name(self) -> &'static str {
match self {
CandidateCodec::Chromoharmonic => "chromoharmonic",
CandidateCodec::DeltaPulse => "delta-pulse",
CandidateCodec::RunLength => "run-length",
CandidateCodec::Lz4 => "lz4",
CandidateCodec::Gzip => "gzip-6",
CandidateCodec::Zstd => "zstd-3",
}
}
}
#[derive(Clone, Debug)]
struct BenchRow {
codec: CandidateCodec,
input_bytes: usize,
output_bytes: usize,
ratio: f64,
enc_mib_s: f64,
dec_mib_s: f64,
roundtrip_ok: bool,
enc_total_s: f64,
dec_total_s: f64,
}
#[derive(Clone, Copy, Debug, Default)]
struct Aggregate {
input_bytes: usize,
output_bytes: usize,
enc_total_s: f64,
dec_total_s: f64,
}
fn main() -> Result<(), Box<dyn Error>> {
let iterations = parse_iterations();
let datasets = collect_datasets();
println!(
"Lossless compression benchmark against external algorithms (iterations per dataset/codec: {})",
iterations
);
println!("Lower ratio is better. Positive speed deltas mean faster than chromoharmonic.");
println!();
let mut aggregate: Vec<(CandidateCodec, Aggregate)> = CandidateCodec::ALL
.iter()
.copied()
.map(|c| (c, Aggregate::default()))
.collect();
for (name, payload) in &datasets {
println!("Dataset: {} ({} bytes)", name, payload.len());
println!(
"{:14} {:>11} {:>8} {:>12} {:>10} {:>12} {:>10} {:>12} {:>5}",
"codec",
"output",
"ratio",
"size vs ch",
"enc MiB/s",
"enc vs ch",
"dec MiB/s",
"dec vs ch",
"ok"
);
let mut rows = Vec::with_capacity(CandidateCodec::ALL.len());
for codec in CandidateCodec::ALL {
rows.push(run_one(codec, payload, iterations)?);
}
let chromo = rows
.iter()
.find(|r| r.codec == CandidateCodec::Chromoharmonic)
.ok_or_else(|| io::Error::other("missing chromoharmonic baseline"))?;
for row in &rows {
let size_vs = pct_delta(row.ratio, chromo.ratio);
let enc_vs = pct_delta(row.enc_mib_s, chromo.enc_mib_s);
let dec_vs = pct_delta(row.dec_mib_s, chromo.dec_mib_s);
let ok = if row.roundtrip_ok { "yes" } else { "no" };
println!(
"{:14} {:>11} {:>8.3} {:>12} {:>10.2} {:>12} {:>10.2} {:>12} {:>5}",
row.codec.name(),
row.output_bytes,
row.ratio,
format_pct(size_vs),
row.enc_mib_s,
format_pct(enc_vs),
row.dec_mib_s,
format_pct(dec_vs),
ok
);
}
println!();
for row in rows {
if let Some((_, agg)) = aggregate.iter_mut().find(|(codec, _)| *codec == row.codec) {
agg.input_bytes += row.input_bytes * iterations;
agg.output_bytes += row.output_bytes * iterations;
agg.enc_total_s += row.enc_total_s;
agg.dec_total_s += row.dec_total_s;
}
}
}
println!("Global Summary (weighted across all datasets)");
println!(
"{:14} {:>8} {:>12} {:>10} {:>12} {:>10} {:>12}",
"codec", "ratio", "size vs ch", "enc MiB/s", "enc vs ch", "dec MiB/s", "dec vs ch"
);
let chromo_summary = aggregate
.iter()
.find(|(codec, _)| *codec == CandidateCodec::Chromoharmonic)
.map(|(_, agg)| summary_values(*agg))
.ok_or_else(|| io::Error::other("missing chromoharmonic summary"))?;
for (codec, agg) in aggregate {
let (ratio, enc_mib_s, dec_mib_s) = summary_values(agg);
println!(
"{:14} {:>8.3} {:>12} {:>10.2} {:>12} {:>10.2} {:>12}",
codec.name(),
ratio,
format_pct(pct_delta(ratio, chromo_summary.0)),
enc_mib_s,
format_pct(pct_delta(enc_mib_s, chromo_summary.1)),
dec_mib_s,
format_pct(pct_delta(dec_mib_s, chromo_summary.2))
);
}
Ok(())
}
fn parse_iterations() -> usize {
let mut args = env::args().skip(1);
let mut iterations = DEFAULT_ITERATIONS;
while let Some(arg) = args.next() {
if let Some(v) = arg.strip_prefix("--iterations=") {
if let Ok(parsed) = v.parse::<usize>() {
iterations = parsed.max(1);
}
continue;
}
if arg == "--iterations"
&& let Some(v) = args.next()
&& let Ok(parsed) = v.parse::<usize>()
{
iterations = parsed.max(1);
}
}
iterations
}
fn collect_datasets() -> Vec<(&'static str, Vec<u8>)> {
vec![
("harmonic-wave-1MiB", harmonic_wave(1024 * 1024)),
("pulse-train-1MiB", pulse_train(1024 * 1024)),
("mixed-signal-1MiB", mixed_signal(1024 * 1024, 0xC0DE_CAFE)),
(
"pseudo-random-1MiB",
pseudo_random(1024 * 1024, 0x0BAD_F00D),
),
(
"musical-light-audio-wav",
synth_wav_musical_light(44_100, 4.0),
),
(
"musical-light-video-y4m",
synth_y4m_musical_light(192, 108, 75, 25),
),
("symbolic-words-256KiB", synth_symbolic_words(256 * 1024)),
]
}
fn run_one(
codec: CandidateCodec,
payload: &[u8],
iterations: usize,
) -> Result<BenchRow, Box<dyn Error>> {
let mut output_bytes = 0usize;
let mut enc_total_s = 0.0;
let mut dec_total_s = 0.0;
for _ in 0..iterations {
let start_enc = Instant::now();
let encoded = compress_candidate(codec, payload)?;
enc_total_s += start_enc.elapsed().as_secs_f64();
if output_bytes == 0 {
output_bytes = encoded.len();
}
let start_dec = Instant::now();
let decoded = decompress_candidate(codec, &encoded)?;
dec_total_s += start_dec.elapsed().as_secs_f64();
if decoded != payload {
return Err(
io::Error::other(format!("roundtrip mismatch for {}", codec.name())).into(),
);
}
}
let input_bytes = payload.len();
let ratio = if input_bytes == 0 {
1.0
} else {
output_bytes as f64 / input_bytes as f64
};
let total_input_mib = (input_bytes * iterations) as f64 / MI_B;
let enc_mib_s = total_input_mib / enc_total_s.max(1e-9);
let dec_mib_s = total_input_mib / dec_total_s.max(1e-9);
Ok(BenchRow {
codec,
input_bytes,
output_bytes,
ratio,
enc_mib_s,
dec_mib_s,
roundtrip_ok: true,
enc_total_s,
dec_total_s,
})
}
fn compress_candidate(codec: CandidateCodec, input: &[u8]) -> Result<Vec<u8>, Box<dyn Error>> {
let out = match codec {
CandidateCodec::Chromoharmonic => compress_with(Codec::Chromoharmonic, input),
CandidateCodec::DeltaPulse => compress_with(Codec::DeltaPulse, input),
CandidateCodec::RunLength => compress_with(Codec::RunLength, input),
CandidateCodec::Lz4 => compress_prepend_size(input),
CandidateCodec::Gzip => {
let mut encoder = GzEncoder::new(Vec::new(), Compression::new(6));
encoder.write_all(input)?;
encoder.finish()?
}
CandidateCodec::Zstd => zstd::stream::encode_all(Cursor::new(input), 3)?,
};
Ok(out)
}
fn decompress_candidate(codec: CandidateCodec, input: &[u8]) -> Result<Vec<u8>, Box<dyn Error>> {
let out = match codec {
CandidateCodec::Chromoharmonic => decompress_with(Codec::Chromoharmonic, input)?,
CandidateCodec::DeltaPulse => decompress_with(Codec::DeltaPulse, input)?,
CandidateCodec::RunLength => decompress_with(Codec::RunLength, input)?,
CandidateCodec::Lz4 => decompress_size_prepended(input)?,
CandidateCodec::Gzip => {
let mut decoder = GzDecoder::new(input);
let mut out = Vec::new();
decoder.read_to_end(&mut out)?;
out
}
CandidateCodec::Zstd => zstd::stream::decode_all(Cursor::new(input))?,
};
Ok(out)
}
fn pct_delta(current: f64, baseline: f64) -> f64 {
if baseline.abs() < 1e-12 {
0.0
} else {
((current - baseline) / baseline) * 100.0
}
}
fn format_pct(v: f64) -> String {
format!("{:+.1}%", v)
}
fn summary_values(agg: Aggregate) -> (f64, f64, f64) {
let ratio = if agg.input_bytes == 0 {
1.0
} else {
agg.output_bytes as f64 / agg.input_bytes as f64
};
let total_mib = agg.input_bytes as f64 / MI_B;
let enc_mib_s = total_mib / agg.enc_total_s.max(1e-9);
let dec_mib_s = total_mib / agg.dec_total_s.max(1e-9);
(ratio, enc_mib_s, dec_mib_s)
}