#![cfg_attr(docsrs, doc(cfg(feature = "mtf")))]
use crate::error::Error;
use crate::traits::{Algorithm, RawDecoder, RawEncoder, RawProgress};
#[derive(Debug, Clone, Copy, Default)]
pub struct Mtf;
impl Algorithm for Mtf {
const NAME: &'static str = "mtf";
type Encoder = Encoder;
type Decoder = Decoder;
type EncoderConfig = ();
type DecoderConfig = ();
fn encoder_with(_cfg: ()) -> Encoder {
Encoder::new()
}
fn decoder_with(_cfg: ()) -> Decoder {
Decoder::new()
}
}
#[derive(Debug, Clone)]
struct Table {
table: [u8; 256],
}
impl Table {
const fn new() -> Self {
let mut table = [0u8; 256];
let mut i = 0usize;
while i < 256 {
table[i] = i as u8;
i += 1;
}
Self { table }
}
fn reset(&mut self) {
let mut i = 0usize;
while i < 256 {
self.table[i] = i as u8;
i += 1;
}
}
fn encode_byte(&mut self, b: u8) -> u8 {
let mut carry = self.table[0];
if carry == b {
return 0;
}
self.table[0] = b;
let mut rank = 1usize;
loop {
let next = self.table[rank];
self.table[rank] = carry;
if next == b {
return rank as u8;
}
carry = next;
rank += 1;
}
}
fn decode_byte(&mut self, rank: u8) -> u8 {
let b = self.table[rank as usize];
self.move_to_front(rank as usize);
b
}
fn move_to_front(&mut self, rank: usize) {
if rank == 0 {
return;
}
let b = self.table[rank];
self.table.copy_within(0..rank, 1);
self.table[0] = b;
}
}
#[derive(Debug, Clone)]
pub struct Encoder {
table: Table,
}
impl Encoder {
pub const fn new() -> Self {
Self {
table: Table::new(),
}
}
}
impl Default for Encoder {
fn default() -> Self {
Self::new()
}
}
impl RawEncoder for Encoder {
fn raw_encode(&mut self, input: &[u8], output: &mut [u8]) -> Result<RawProgress, Error> {
let n = input.len().min(output.len());
for i in 0..n {
output[i] = self.table.encode_byte(input[i]);
}
Ok(RawProgress {
consumed: n,
written: n,
done: false,
})
}
fn raw_finish(&mut self, _output: &mut [u8]) -> Result<RawProgress, Error> {
Ok(RawProgress {
consumed: 0,
written: 0,
done: true,
})
}
fn raw_reset(&mut self) {
self.table.reset();
}
}
#[derive(Debug, Clone)]
pub struct Decoder {
table: Table,
}
impl Decoder {
pub const fn new() -> Self {
Self {
table: Table::new(),
}
}
}
impl Default for Decoder {
fn default() -> Self {
Self::new()
}
}
impl RawDecoder for Decoder {
fn raw_decode(&mut self, input: &[u8], output: &mut [u8]) -> Result<RawProgress, Error> {
let n = input.len().min(output.len());
for i in 0..n {
output[i] = self.table.decode_byte(input[i]);
}
Ok(RawProgress {
consumed: n,
written: n,
done: false,
})
}
fn raw_finish(&mut self, _output: &mut [u8]) -> Result<RawProgress, Error> {
Ok(RawProgress {
consumed: 0,
written: 0,
done: true,
})
}
fn raw_reset(&mut self) {
self.table.reset();
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::traits::{Decoder as _, Encoder as _, Status};
use alloc::vec;
use alloc::vec::Vec;
fn encode_all(data: &[u8]) -> Vec<u8> {
let mut enc = Mtf::encoder();
let mut out = vec![0u8; data.len()];
let (p, status) = enc.encode(data, &mut out).unwrap();
assert_eq!(p.consumed, data.len());
assert_eq!(p.written, data.len());
assert_eq!(status, Status::InputEmpty);
let (fp, fstatus) = enc.finish(&mut []).unwrap();
assert_eq!(fp.written, 0);
assert_eq!(fstatus, Status::StreamEnd);
out
}
fn decode_all(data: &[u8]) -> Vec<u8> {
let mut dec = Mtf::decoder();
let mut out = vec![0u8; data.len()];
let (p, _status) = dec.decode(data, &mut out).unwrap();
assert_eq!(p.consumed, data.len());
assert_eq!(p.written, data.len());
out
}
fn assert_round_trip(data: &[u8]) {
let encoded = encode_all(data);
assert_eq!(
encoded.len(),
data.len(),
"filter must be length-preserving"
);
let decoded = decode_all(&encoded);
assert_eq!(decoded, data, "round-trip must be byte-identical");
}
#[test]
fn round_trip_empty() {
assert_round_trip(&[]);
}
#[test]
fn round_trip_single_byte() {
assert_round_trip(&[0x00]);
assert_round_trip(&[0x7F]);
assert_round_trip(&[0xFF]);
}
#[test]
fn round_trip_repeated_bytes() {
assert_round_trip(&[0x41; 1000]);
assert_round_trip(&[0xFF; 257]);
}
#[test]
fn round_trip_english_text() {
let text = b"the quick brown fox jumps over the lazy dog. \
The QUICK Brown Fox Jumps Over The Lazy Dog!";
assert_round_trip(text);
}
#[test]
fn round_trip_all_byte_values() {
let data: Vec<u8> = (0..=255u8).collect();
assert_round_trip(&data);
let rev: Vec<u8> = (0..=255u8).rev().collect();
assert_round_trip(&rev);
}
#[test]
fn round_trip_pseudo_random() {
let mut state = 0x2545_F491_4F6C_DD1Du64;
let mut data = Vec::with_capacity(4096);
for _ in 0..4096 {
state ^= state << 13;
state ^= state >> 7;
state ^= state << 17;
data.push((state & 0xFF) as u8);
}
assert_round_trip(&data);
}
#[test]
fn streaming_matches_one_shot() {
let mut state = 0x9E37_79B9_7F4A_7C15u64;
let mut data = Vec::with_capacity(3000);
for _ in 0..3000 {
state ^= state << 13;
state ^= state >> 7;
state ^= state << 17;
data.push((state & 0xFF) as u8);
}
let one_shot = encode_all(&data);
for &chunk in &[1usize, 2, 3, 7, 64] {
let mut enc = Mtf::encoder();
let mut streamed = Vec::with_capacity(data.len());
let mut scratch = vec![0u8; chunk];
let mut off = 0;
while off < data.len() {
let end = (off + chunk).min(data.len());
let (p, _) = enc.encode(&data[off..end], &mut scratch).unwrap();
assert_eq!(p.consumed, end - off);
streamed.extend_from_slice(&scratch[..p.written]);
off += p.consumed;
}
assert_eq!(streamed, one_shot, "chunk size {chunk} diverged");
let mut dec = Mtf::decoder();
let mut decoded = Vec::with_capacity(data.len());
let mut doff = 0;
while doff < streamed.len() {
let end = (doff + chunk).min(streamed.len());
let (p, _) = dec.decode(&streamed[doff..end], &mut scratch).unwrap();
decoded.extend_from_slice(&scratch[..p.written]);
doff += p.consumed;
}
assert_eq!(decoded, data, "chunked decode (chunk {chunk}) diverged");
}
}
#[test]
fn repetitive_data_yields_low_bytes() {
let data = [0x42u8; 500];
let encoded = encode_all(&data);
assert_eq!(encoded[0], 0x42, "first occurrence emits the symbol's rank");
assert!(
encoded[1..].iter().all(|&b| b == 0),
"subsequent identical bytes must encode as 0"
);
let mixed: Vec<u8> = b"aaaabbbbccccddddaaaabbbb".to_vec();
let enc_mixed = encode_all(&mixed);
let zeros = enc_mixed.iter().filter(|&&b| b == 0).count();
assert!(
zeros >= mixed.len() / 2,
"locally repetitive data should be majority zeros, got {zeros}/{}",
mixed.len()
);
}
#[test]
fn length_preserving_across_sizes() {
for len in [0usize, 1, 2, 17, 256, 257, 1024] {
let data: Vec<u8> = (0..len).map(|i| (i * 31 + 7) as u8).collect();
let encoded = encode_all(&data);
assert_eq!(encoded.len(), data.len());
let decoded = decode_all(&encoded);
assert_eq!(decoded, data);
}
}
#[test]
fn reset_restores_initial_state() {
let first = b"hello world";
let second = b"different payload entirely";
let mut enc = Mtf::encoder();
let mut out = vec![0u8; 64];
let (_p, _) = enc.encode(first, &mut out).unwrap();
enc.reset();
let (p, _) = enc.encode(second, &mut out).unwrap();
let after_reset = out[..p.written].to_vec();
let fresh = encode_all(second);
assert_eq!(after_reset, fresh);
}
}