paracletics-hypercube 0.1.0

General-purpose paracletic hyper cube compression toolkit.
Documentation
use std::env;
use std::error::Error;
use std::io::{self, Cursor, Read, Write};
use std::time::Instant;

use flate2::Compression;
use flate2::read::GzDecoder;
use flate2::write::GzEncoder;
use lz4_flex::{compress_prepend_size, decompress_size_prepended};
use paracletics_hypercube::compression::{Codec, compress_with, decompress_with};
use paracletics_hypercube::datasets::{harmonic_wave, mixed_signal, pseudo_random, pulse_train};
use paracletics_hypercube::media_samples::{
    synth_symbolic_words, synth_wav_musical_light, synth_y4m_musical_light,
};

const MI_B: f64 = 1024.0 * 1024.0;
const DEFAULT_ITERATIONS: usize = 8;

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum CandidateCodec {
    Chromoharmonic,
    DeltaPulse,
    RunLength,
    Lz4,
    Gzip,
    Zstd,
}

impl CandidateCodec {
    const ALL: [CandidateCodec; 6] = [
        CandidateCodec::Chromoharmonic,
        CandidateCodec::DeltaPulse,
        CandidateCodec::RunLength,
        CandidateCodec::Lz4,
        CandidateCodec::Gzip,
        CandidateCodec::Zstd,
    ];

    fn name(self) -> &'static str {
        match self {
            CandidateCodec::Chromoharmonic => "chromoharmonic",
            CandidateCodec::DeltaPulse => "delta-pulse",
            CandidateCodec::RunLength => "run-length",
            CandidateCodec::Lz4 => "lz4",
            CandidateCodec::Gzip => "gzip-6",
            CandidateCodec::Zstd => "zstd-3",
        }
    }
}

#[derive(Clone, Debug)]
struct BenchRow {
    codec: CandidateCodec,
    input_bytes: usize,
    output_bytes: usize,
    ratio: f64,
    enc_mib_s: f64,
    dec_mib_s: f64,
    roundtrip_ok: bool,
    enc_total_s: f64,
    dec_total_s: f64,
}

#[derive(Clone, Copy, Debug, Default)]
struct Aggregate {
    input_bytes: usize,
    output_bytes: usize,
    enc_total_s: f64,
    dec_total_s: f64,
}

fn main() -> Result<(), Box<dyn Error>> {
    let iterations = parse_iterations();
    let datasets = collect_datasets();

    println!(
        "Lossless compression benchmark against external algorithms (iterations per dataset/codec: {})",
        iterations
    );
    println!("Lower ratio is better. Positive speed deltas mean faster than chromoharmonic.");
    println!();

    let mut aggregate: Vec<(CandidateCodec, Aggregate)> = CandidateCodec::ALL
        .iter()
        .copied()
        .map(|c| (c, Aggregate::default()))
        .collect();

    for (name, payload) in &datasets {
        println!("Dataset: {} ({} bytes)", name, payload.len());
        println!(
            "{:14} {:>11} {:>8} {:>12} {:>10} {:>12} {:>10} {:>12} {:>5}",
            "codec",
            "output",
            "ratio",
            "size vs ch",
            "enc MiB/s",
            "enc vs ch",
            "dec MiB/s",
            "dec vs ch",
            "ok"
        );

        let mut rows = Vec::with_capacity(CandidateCodec::ALL.len());
        for codec in CandidateCodec::ALL {
            rows.push(run_one(codec, payload, iterations)?);
        }
        let chromo = rows
            .iter()
            .find(|r| r.codec == CandidateCodec::Chromoharmonic)
            .ok_or_else(|| io::Error::other("missing chromoharmonic baseline"))?;

        for row in &rows {
            let size_vs = pct_delta(row.ratio, chromo.ratio);
            let enc_vs = pct_delta(row.enc_mib_s, chromo.enc_mib_s);
            let dec_vs = pct_delta(row.dec_mib_s, chromo.dec_mib_s);
            let ok = if row.roundtrip_ok { "yes" } else { "no" };

            println!(
                "{:14} {:>11} {:>8.3} {:>12} {:>10.2} {:>12} {:>10.2} {:>12} {:>5}",
                row.codec.name(),
                row.output_bytes,
                row.ratio,
                format_pct(size_vs),
                row.enc_mib_s,
                format_pct(enc_vs),
                row.dec_mib_s,
                format_pct(dec_vs),
                ok
            );
        }
        println!();

        for row in rows {
            if let Some((_, agg)) = aggregate.iter_mut().find(|(codec, _)| *codec == row.codec) {
                agg.input_bytes += row.input_bytes * iterations;
                agg.output_bytes += row.output_bytes * iterations;
                agg.enc_total_s += row.enc_total_s;
                agg.dec_total_s += row.dec_total_s;
            }
        }
    }

    println!("Global Summary (weighted across all datasets)");
    println!(
        "{:14} {:>8} {:>12} {:>10} {:>12} {:>10} {:>12}",
        "codec", "ratio", "size vs ch", "enc MiB/s", "enc vs ch", "dec MiB/s", "dec vs ch"
    );

    let chromo_summary = aggregate
        .iter()
        .find(|(codec, _)| *codec == CandidateCodec::Chromoharmonic)
        .map(|(_, agg)| summary_values(*agg))
        .ok_or_else(|| io::Error::other("missing chromoharmonic summary"))?;

    for (codec, agg) in aggregate {
        let (ratio, enc_mib_s, dec_mib_s) = summary_values(agg);
        println!(
            "{:14} {:>8.3} {:>12} {:>10.2} {:>12} {:>10.2} {:>12}",
            codec.name(),
            ratio,
            format_pct(pct_delta(ratio, chromo_summary.0)),
            enc_mib_s,
            format_pct(pct_delta(enc_mib_s, chromo_summary.1)),
            dec_mib_s,
            format_pct(pct_delta(dec_mib_s, chromo_summary.2))
        );
    }

    Ok(())
}

fn parse_iterations() -> usize {
    let mut args = env::args().skip(1);
    let mut iterations = DEFAULT_ITERATIONS;

    while let Some(arg) = args.next() {
        if let Some(v) = arg.strip_prefix("--iterations=") {
            if let Ok(parsed) = v.parse::<usize>() {
                iterations = parsed.max(1);
            }
            continue;
        }
        if arg == "--iterations"
            && let Some(v) = args.next()
            && let Ok(parsed) = v.parse::<usize>()
        {
            iterations = parsed.max(1);
        }
    }

    iterations
}

fn collect_datasets() -> Vec<(&'static str, Vec<u8>)> {
    vec![
        ("harmonic-wave-1MiB", harmonic_wave(1024 * 1024)),
        ("pulse-train-1MiB", pulse_train(1024 * 1024)),
        ("mixed-signal-1MiB", mixed_signal(1024 * 1024, 0xC0DE_CAFE)),
        (
            "pseudo-random-1MiB",
            pseudo_random(1024 * 1024, 0x0BAD_F00D),
        ),
        (
            "musical-light-audio-wav",
            synth_wav_musical_light(44_100, 4.0),
        ),
        (
            "musical-light-video-y4m",
            synth_y4m_musical_light(192, 108, 75, 25),
        ),
        ("symbolic-words-256KiB", synth_symbolic_words(256 * 1024)),
    ]
}

fn run_one(
    codec: CandidateCodec,
    payload: &[u8],
    iterations: usize,
) -> Result<BenchRow, Box<dyn Error>> {
    let mut output_bytes = 0usize;
    let mut enc_total_s = 0.0;
    let mut dec_total_s = 0.0;

    for _ in 0..iterations {
        let start_enc = Instant::now();
        let encoded = compress_candidate(codec, payload)?;
        enc_total_s += start_enc.elapsed().as_secs_f64();
        if output_bytes == 0 {
            output_bytes = encoded.len();
        }

        let start_dec = Instant::now();
        let decoded = decompress_candidate(codec, &encoded)?;
        dec_total_s += start_dec.elapsed().as_secs_f64();

        if decoded != payload {
            return Err(
                io::Error::other(format!("roundtrip mismatch for {}", codec.name())).into(),
            );
        }
    }

    let input_bytes = payload.len();
    let ratio = if input_bytes == 0 {
        1.0
    } else {
        output_bytes as f64 / input_bytes as f64
    };
    let total_input_mib = (input_bytes * iterations) as f64 / MI_B;
    let enc_mib_s = total_input_mib / enc_total_s.max(1e-9);
    let dec_mib_s = total_input_mib / dec_total_s.max(1e-9);

    Ok(BenchRow {
        codec,
        input_bytes,
        output_bytes,
        ratio,
        enc_mib_s,
        dec_mib_s,
        roundtrip_ok: true,
        enc_total_s,
        dec_total_s,
    })
}

fn compress_candidate(codec: CandidateCodec, input: &[u8]) -> Result<Vec<u8>, Box<dyn Error>> {
    let out = match codec {
        CandidateCodec::Chromoharmonic => compress_with(Codec::Chromoharmonic, input),
        CandidateCodec::DeltaPulse => compress_with(Codec::DeltaPulse, input),
        CandidateCodec::RunLength => compress_with(Codec::RunLength, input),
        CandidateCodec::Lz4 => compress_prepend_size(input),
        CandidateCodec::Gzip => {
            let mut encoder = GzEncoder::new(Vec::new(), Compression::new(6));
            encoder.write_all(input)?;
            encoder.finish()?
        }
        CandidateCodec::Zstd => zstd::stream::encode_all(Cursor::new(input), 3)?,
    };
    Ok(out)
}

fn decompress_candidate(codec: CandidateCodec, input: &[u8]) -> Result<Vec<u8>, Box<dyn Error>> {
    let out = match codec {
        CandidateCodec::Chromoharmonic => decompress_with(Codec::Chromoharmonic, input)?,
        CandidateCodec::DeltaPulse => decompress_with(Codec::DeltaPulse, input)?,
        CandidateCodec::RunLength => decompress_with(Codec::RunLength, input)?,
        CandidateCodec::Lz4 => decompress_size_prepended(input)?,
        CandidateCodec::Gzip => {
            let mut decoder = GzDecoder::new(input);
            let mut out = Vec::new();
            decoder.read_to_end(&mut out)?;
            out
        }
        CandidateCodec::Zstd => zstd::stream::decode_all(Cursor::new(input))?,
    };
    Ok(out)
}

fn pct_delta(current: f64, baseline: f64) -> f64 {
    if baseline.abs() < 1e-12 {
        0.0
    } else {
        ((current - baseline) / baseline) * 100.0
    }
}

fn format_pct(v: f64) -> String {
    format!("{:+.1}%", v)
}

fn summary_values(agg: Aggregate) -> (f64, f64, f64) {
    let ratio = if agg.input_bytes == 0 {
        1.0
    } else {
        agg.output_bytes as f64 / agg.input_bytes as f64
    };
    let total_mib = agg.input_bytes as f64 / MI_B;
    let enc_mib_s = total_mib / agg.enc_total_s.max(1e-9);
    let dec_mib_s = total_mib / agg.dec_total_s.max(1e-9);
    (ratio, enc_mib_s, dec_mib_s)
}