use std::io::{self, Read};
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Run<T> {
pub value: T,
pub length: u64,
}
impl<T> Run<T> {
#[must_use]
pub fn new(value: T, length: u64) -> Self {
Self { value, length }
}
}
#[derive(Debug, Clone)]
pub struct RunLengthEncoding {
runs: Vec<Run<u64>>,
total_count: usize,
}
impl<'a> IntoIterator for &'a RunLengthEncoding {
type Item = u64;
type IntoIter = RunLengthIterator<'a>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
impl RunLengthEncoding {
#[must_use]
pub fn encode(values: &[u64]) -> Self {
if values.is_empty() {
return Self {
runs: Vec::new(),
total_count: 0,
};
}
let mut runs = Vec::new();
let mut current_value = values[0];
let mut current_length = 1u64;
for &value in &values[1..] {
if value == current_value {
current_length += 1;
} else {
runs.push(Run::new(current_value, current_length));
current_value = value;
current_length = 1;
}
}
runs.push(Run::new(current_value, current_length));
Self {
runs,
total_count: values.len(),
}
}
#[must_use]
pub fn from_runs(runs: Vec<Run<u64>>) -> Self {
let total_count = runs
.iter()
.map(|r| {
#[allow(clippy::cast_possible_truncation)]
let len = r.length as usize;
len
})
.sum();
Self { runs, total_count }
}
#[must_use]
pub fn decode(&self) -> Vec<u64> {
let mut values = Vec::with_capacity(self.total_count);
for run in &self.runs {
for _ in 0..run.length {
values.push(run.value);
}
}
values
}
#[must_use]
pub fn run_count(&self) -> usize {
self.runs.len()
}
#[must_use]
pub fn total_count(&self) -> usize {
self.total_count
}
#[must_use]
pub fn runs(&self) -> &[Run<u64>] {
&self.runs
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.total_count == 0
}
#[must_use]
pub fn compression_ratio(&self) -> f64 {
if self.runs.is_empty() {
return 1.0;
}
let original_size = self.total_count * 8;
let encoded_size = self.runs.len() * 16;
if encoded_size == 0 {
return 1.0;
}
original_size as f64 / encoded_size as f64
}
#[must_use]
pub fn is_beneficial(&self) -> bool {
self.compression_ratio() > 1.0
}
#[must_use]
pub fn encoded_size(&self) -> usize {
self.runs.len() * 16
}
pub fn to_bytes(&self) -> Vec<u8> {
let mut bytes = Vec::with_capacity(8 + self.runs.len() * 16);
bytes.extend_from_slice(&(self.runs.len() as u64).to_le_bytes());
for run in &self.runs {
bytes.extend_from_slice(&run.value.to_le_bytes());
bytes.extend_from_slice(&run.length.to_le_bytes());
}
bytes
}
pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
let mut cursor = io::Cursor::new(bytes);
let mut buf = [0u8; 8];
cursor.read_exact(&mut buf)?;
#[allow(clippy::cast_possible_truncation)]
let run_count = u64::from_le_bytes(buf) as usize;
let mut runs = Vec::with_capacity(run_count);
for _ in 0..run_count {
cursor.read_exact(&mut buf)?;
let value = u64::from_le_bytes(buf);
cursor.read_exact(&mut buf)?;
let length = u64::from_le_bytes(buf);
runs.push(Run::new(value, length));
}
Ok(Self::from_runs(runs))
}
#[must_use]
pub fn get(&self, index: usize) -> Option<u64> {
if index >= self.total_count {
return None;
}
let mut offset = 0usize;
for run in &self.runs {
#[allow(clippy::cast_possible_truncation)]
let run_end = offset + run.length as usize;
if index < run_end {
return Some(run.value);
}
offset = run_end;
}
None
}
pub fn iter(&self) -> RunLengthIterator<'_> {
RunLengthIterator {
runs: &self.runs,
run_index: 0,
within_run: 0,
}
}
}
pub struct RunLengthIterator<'a> {
runs: &'a [Run<u64>],
run_index: usize,
within_run: u64,
}
impl Iterator for RunLengthIterator<'_> {
type Item = u64;
fn next(&mut self) -> Option<Self::Item> {
while self.run_index < self.runs.len() {
let run = &self.runs[self.run_index];
if self.within_run < run.length {
self.within_run += 1;
return Some(run.value);
}
self.run_index += 1;
self.within_run = 0;
}
None
}
fn size_hint(&self) -> (usize, Option<usize>) {
let remaining: u64 = self.runs[self.run_index..]
.iter()
.map(|r| r.length)
.sum::<u64>()
- self.within_run;
#[allow(clippy::cast_possible_truncation)]
(remaining as usize, Some(remaining as usize))
}
}
impl ExactSizeIterator for RunLengthIterator<'_> {}
#[derive(Debug, Clone)]
pub struct SignedRunLengthEncoding {
inner: RunLengthEncoding,
}
impl SignedRunLengthEncoding {
#[must_use]
pub fn encode(values: &[i64]) -> Self {
let unsigned: Vec<u64> = values.iter().map(|&v| zigzag_encode(v)).collect();
Self {
inner: RunLengthEncoding::encode(&unsigned),
}
}
#[must_use]
pub fn decode(&self) -> Vec<i64> {
self.inner.decode().into_iter().map(zigzag_decode).collect()
}
#[must_use]
pub fn compression_ratio(&self) -> f64 {
self.inner.compression_ratio()
}
#[must_use]
pub fn run_count(&self) -> usize {
self.inner.run_count()
}
pub fn to_bytes(&self) -> Vec<u8> {
self.inner.to_bytes()
}
pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
Ok(Self {
inner: RunLengthEncoding::from_bytes(bytes)?,
})
}
}
#[inline]
#[must_use]
#[allow(clippy::cast_sign_loss)]
pub fn zigzag_encode(n: i64) -> u64 {
((n << 1) ^ (n >> 63)) as u64
}
#[inline]
#[must_use]
#[allow(clippy::cast_possible_wrap)]
pub fn zigzag_decode(n: u64) -> i64 {
((n >> 1) as i64) ^ -((n & 1) as i64)
}
pub struct RunLengthAnalyzer;
impl RunLengthAnalyzer {
#[must_use]
pub fn estimate_ratio(values: &[u64]) -> f64 {
if values.is_empty() {
return 1.0;
}
let mut run_count = 1usize;
for i in 1..values.len() {
if values[i] != values[i - 1] {
run_count += 1;
}
}
let original = values.len() * 8;
let encoded = run_count * 16;
if encoded == 0 {
return 1.0;
}
original as f64 / encoded as f64
}
#[must_use]
pub fn is_beneficial(values: &[u64]) -> bool {
Self::estimate_ratio(values) > 1.0
}
#[must_use]
pub fn average_run_length(values: &[u64]) -> f64 {
if values.is_empty() {
return 0.0;
}
let mut run_count = 1usize;
for i in 1..values.len() {
if values[i] != values[i - 1] {
run_count += 1;
}
}
values.len() as f64 / run_count as f64
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_encode_decode_basic() {
let values = vec![1, 1, 1, 2, 2, 3, 3, 3, 3, 3];
let encoded = RunLengthEncoding::encode(&values);
assert_eq!(encoded.run_count(), 3);
assert_eq!(encoded.total_count(), 10);
let decoded = encoded.decode();
assert_eq!(values, decoded);
}
#[test]
fn test_encode_empty() {
let values: Vec<u64> = vec![];
let encoded = RunLengthEncoding::encode(&values);
assert_eq!(encoded.run_count(), 0);
assert_eq!(encoded.total_count(), 0);
assert!(encoded.is_empty());
let decoded = encoded.decode();
assert!(decoded.is_empty());
}
#[test]
fn test_encode_single() {
let values = vec![42];
let encoded = RunLengthEncoding::encode(&values);
assert_eq!(encoded.run_count(), 1);
assert_eq!(encoded.total_count(), 1);
let decoded = encoded.decode();
assert_eq!(values, decoded);
}
#[test]
fn test_encode_all_same() {
let values = vec![7u64; 1000];
let encoded = RunLengthEncoding::encode(&values);
assert_eq!(encoded.run_count(), 1);
assert_eq!(encoded.total_count(), 1000);
let ratio = encoded.compression_ratio();
assert!(ratio > 50.0, "Expected ratio > 50, got {}", ratio);
let decoded = encoded.decode();
assert_eq!(values, decoded);
}
#[test]
fn test_encode_all_different() {
let values: Vec<u64> = (0..100).collect();
let encoded = RunLengthEncoding::encode(&values);
assert_eq!(encoded.run_count(), 100);
assert_eq!(encoded.total_count(), 100);
let ratio = encoded.compression_ratio();
assert!(ratio < 1.0, "Expected ratio < 1, got {}", ratio);
let decoded = encoded.decode();
assert_eq!(values, decoded);
}
#[test]
fn test_compression_ratio() {
let all_same = vec![1u64; 100];
let encoded = RunLengthEncoding::encode(&all_same);
assert!(encoded.compression_ratio() > 1.0);
assert!(encoded.is_beneficial());
let all_diff: Vec<u64> = (0..100).collect();
let encoded = RunLengthEncoding::encode(&all_diff);
assert!(encoded.compression_ratio() < 1.0);
assert!(!encoded.is_beneficial());
}
#[test]
fn test_serialization() {
let values = vec![1, 1, 1, 2, 2, 3, 3, 3, 3, 3];
let encoded = RunLengthEncoding::encode(&values);
let bytes = encoded.to_bytes();
let decoded_encoding = RunLengthEncoding::from_bytes(&bytes).unwrap();
assert_eq!(encoded.run_count(), decoded_encoding.run_count());
assert_eq!(encoded.decode(), decoded_encoding.decode());
}
#[test]
fn test_get_index() {
let values = vec![1, 1, 1, 2, 2, 3, 3, 3, 3, 3];
let encoded = RunLengthEncoding::encode(&values);
assert_eq!(encoded.get(0), Some(1));
assert_eq!(encoded.get(2), Some(1));
assert_eq!(encoded.get(3), Some(2));
assert_eq!(encoded.get(4), Some(2));
assert_eq!(encoded.get(5), Some(3));
assert_eq!(encoded.get(9), Some(3));
assert_eq!(encoded.get(10), None);
}
#[test]
fn test_iterator() {
let values = vec![1, 1, 1, 2, 2, 3];
let encoded = RunLengthEncoding::encode(&values);
let iterated: Vec<u64> = encoded.iter().collect();
assert_eq!(values, iterated);
}
#[test]
fn test_signed_integers() {
let values = vec![-5, -5, -5, 0, 0, 10, 10, 10, 10];
let encoded = SignedRunLengthEncoding::encode(&values);
assert_eq!(encoded.run_count(), 3);
let decoded = encoded.decode();
assert_eq!(values, decoded);
}
#[test]
fn test_signed_serialization() {
let values = vec![-100, -100, 0, 0, 0, 100];
let encoded = SignedRunLengthEncoding::encode(&values);
let bytes = encoded.to_bytes();
let decoded_encoding = SignedRunLengthEncoding::from_bytes(&bytes).unwrap();
assert_eq!(encoded.decode(), decoded_encoding.decode());
}
#[test]
fn test_zigzag() {
assert_eq!(zigzag_encode(0), 0);
assert_eq!(zigzag_encode(-1), 1);
assert_eq!(zigzag_encode(1), 2);
assert_eq!(zigzag_encode(-2), 3);
assert_eq!(zigzag_encode(2), 4);
for i in -1000i64..1000 {
assert_eq!(zigzag_decode(zigzag_encode(i)), i);
}
}
#[test]
fn test_analyzer_estimate() {
let all_same = vec![1u64; 100];
let ratio = RunLengthAnalyzer::estimate_ratio(&all_same);
assert!(ratio > 1.0);
assert!(RunLengthAnalyzer::is_beneficial(&all_same));
let all_diff: Vec<u64> = (0..100).collect();
let ratio = RunLengthAnalyzer::estimate_ratio(&all_diff);
assert!(ratio < 1.0);
assert!(!RunLengthAnalyzer::is_beneficial(&all_diff));
}
#[test]
fn test_analyzer_average_run_length() {
let values = vec![1, 1, 1, 2, 2, 3, 3, 3, 3, 3]; let avg = RunLengthAnalyzer::average_run_length(&values);
assert!((avg - 3.33).abs() < 0.1);
let all_same = vec![1u64; 100];
let avg = RunLengthAnalyzer::average_run_length(&all_same);
assert!((avg - 100.0).abs() < 0.1);
}
#[test]
fn test_from_runs() {
let runs = vec![Run::new(1, 3), Run::new(2, 2), Run::new(3, 5)];
let encoded = RunLengthEncoding::from_runs(runs);
assert_eq!(encoded.run_count(), 3);
assert_eq!(encoded.total_count(), 10);
assert_eq!(encoded.decode(), vec![1, 1, 1, 2, 2, 3, 3, 3, 3, 3]);
}
}