use crate::error::CodecError;
use crate::fastlanes;
const MAX_EXPONENT: u8 = 18;
const POW10: [f64; 19] = [
1.0,
10.0,
100.0,
1_000.0,
10_000.0,
100_000.0,
1_000_000.0,
10_000_000.0,
100_000_000.0,
1_000_000_000.0,
10_000_000_000.0,
100_000_000_000.0,
1_000_000_000_000.0,
10_000_000_000_000.0,
100_000_000_000_000.0,
1_000_000_000_000_000.0,
10_000_000_000_000_000.0,
100_000_000_000_000_000.0,
1_000_000_000_000_000_000.0,
];
const INV_POW10: [f64; 19] = [
1.0,
0.1,
0.01,
0.001,
0.000_1,
0.000_01,
0.000_001,
0.000_000_1,
0.000_000_01,
0.000_000_001,
0.000_000_000_1,
0.000_000_000_01,
0.000_000_000_001,
0.000_000_000_000_1,
0.000_000_000_000_01,
0.000_000_000_000_001,
0.000_000_000_000_000_1,
0.000_000_000_000_000_01,
0.000_000_000_000_000_001,
];
use crate::CODEC_SAMPLE_SIZE;
pub fn encode(values: &[f64]) -> Vec<u8> {
let count = values.len() as u32;
if values.is_empty() {
let mut out = Vec::with_capacity(11);
out.extend_from_slice(&0u32.to_le_bytes()); out.push(0); out.push(0); out.push(0); out.extend_from_slice(&0u32.to_le_bytes()); return out;
}
let params = find_best_params(values);
let factor = POW10[params.encode_exp as usize];
let mut encoded_ints = Vec::with_capacity(values.len());
let mut exceptions: Vec<(u32, u64)> = Vec::new();
for (i, &val) in values.iter().enumerate() {
let encoded = try_alp_encode(val, factor, params.decode_exp, params.mode);
match encoded {
Some(int_val) => {
encoded_ints.push(int_val);
}
None => {
exceptions.push((i as u32, val.to_bits()));
encoded_ints.push(0);
}
}
}
let exception_count = exceptions.len() as u32;
let packed_ints = fastlanes::encode(&encoded_ints);
let mut out = Vec::with_capacity(10 + exceptions.len() * 12 + packed_ints.len());
out.extend_from_slice(&count.to_le_bytes());
out.push(params.encode_exp);
out.push(params.decode_exp);
out.push(match params.mode {
DecodeMode::MultiplyInverse => 0,
DecodeMode::DivideByFactor => 1,
});
out.extend_from_slice(&exception_count.to_le_bytes());
for &(idx, bits) in &exceptions {
out.extend_from_slice(&idx.to_le_bytes());
out.extend_from_slice(&bits.to_le_bytes());
}
out.extend_from_slice(&packed_ints);
out
}
pub fn decode(data: &[u8]) -> Result<Vec<f64>, CodecError> {
const HEADER_SIZE: usize = 11;
if data.len() < HEADER_SIZE {
return Err(CodecError::Truncated {
expected: HEADER_SIZE,
actual: data.len(),
});
}
let count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
let _encode_exp = data[4];
let decode_exp = data[5];
let mode = match data[6] {
0 => DecodeMode::MultiplyInverse,
1 => DecodeMode::DivideByFactor,
m => {
return Err(CodecError::Corrupt {
detail: format!("invalid ALP decode mode {m}"),
});
}
};
let exception_count = u32::from_le_bytes([data[7], data[8], data[9], data[10]]) as usize;
if count == 0 {
return Ok(Vec::new());
}
if decode_exp > MAX_EXPONENT {
return Err(CodecError::Corrupt {
detail: format!("invalid ALP decode_exp {decode_exp}"),
});
}
let exceptions_size = exception_count * 12;
let exceptions_end = HEADER_SIZE + exceptions_size;
if data.len() < exceptions_end {
return Err(CodecError::Truncated {
expected: exceptions_end,
actual: data.len(),
});
}
let mut exceptions = Vec::with_capacity(exception_count);
let mut pos = HEADER_SIZE;
for _ in 0..exception_count {
let idx =
u32::from_le_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]) as usize;
let bits = u64::from_le_bytes([
data[pos + 4],
data[pos + 5],
data[pos + 6],
data[pos + 7],
data[pos + 8],
data[pos + 9],
data[pos + 10],
data[pos + 11],
]);
exceptions.push((idx, bits));
pos += 12;
}
let encoded_ints =
fastlanes::decode(&data[exceptions_end..]).map_err(|e| CodecError::Corrupt {
detail: format!("ALP fastlanes decode: {e}"),
})?;
if encoded_ints.len() != count {
return Err(CodecError::Corrupt {
detail: format!(
"ALP int count mismatch: expected {count}, got {}",
encoded_ints.len()
),
});
}
let mut values = Vec::with_capacity(count);
for &int_val in &encoded_ints {
values.push(alp_decode_value(int_val, decode_exp, mode));
}
for &(idx, bits) in &exceptions {
if idx < values.len() {
values[idx] = f64::from_bits(bits);
}
}
Ok(values)
}
pub fn alp_encodability(values: &[f64]) -> f64 {
if values.is_empty() {
return 1.0;
}
let sample_end = values.len().min(CODEC_SAMPLE_SIZE);
let sample = &values[..sample_end];
let params = find_best_params(sample);
let factor = POW10[params.encode_exp as usize];
let encodable = sample
.iter()
.filter(|&&v| try_alp_encode(v, factor, params.decode_exp, params.mode).is_some())
.count();
encodable as f64 / sample.len() as f64
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum DecodeMode {
MultiplyInverse,
DivideByFactor,
}
#[inline]
fn try_alp_encode(value: f64, factor: f64, decode_exp: u8, mode: DecodeMode) -> Option<i64> {
if !value.is_finite() {
return None;
}
let scaled = value * factor;
if scaled < i64::MIN as f64 || scaled > i64::MAX as f64 {
return None;
}
let encoded = scaled.round() as i64;
let decoded = match mode {
DecodeMode::MultiplyInverse => encoded as f64 * INV_POW10[decode_exp as usize],
DecodeMode::DivideByFactor => encoded as f64 / POW10[decode_exp as usize],
};
if decoded.to_bits() == value.to_bits() {
Some(encoded)
} else {
None
}
}
#[inline]
fn alp_decode_value(encoded: i64, decode_exp: u8, mode: DecodeMode) -> f64 {
match mode {
DecodeMode::MultiplyInverse => encoded as f64 * INV_POW10[decode_exp as usize],
DecodeMode::DivideByFactor => encoded as f64 / POW10[decode_exp as usize],
}
}
struct AlpParams {
encode_exp: u8,
decode_exp: u8,
mode: DecodeMode,
}
fn find_best_params(values: &[f64]) -> AlpParams {
let sample_end = values.len().min(CODEC_SAMPLE_SIZE);
let sample = &values[..sample_end];
let mut best = AlpParams {
encode_exp: 0,
decode_exp: 0,
mode: DecodeMode::MultiplyInverse,
};
let mut best_count: usize = 0;
for e in 0..=MAX_EXPONENT {
let factor = POW10[e as usize];
for mode in [DecodeMode::MultiplyInverse, DecodeMode::DivideByFactor] {
let count = sample
.iter()
.filter(|&&v| try_alp_encode(v, factor, e, mode).is_some())
.count();
if count > best_count {
best_count = count;
best = AlpParams {
encode_exp: e,
decode_exp: e,
mode,
};
}
if e > 0 {
let count_alt = sample
.iter()
.filter(|&&v| try_alp_encode(v, factor, e - 1, mode).is_some())
.count();
if count_alt > best_count {
best_count = count_alt;
best = AlpParams {
encode_exp: e,
decode_exp: e - 1,
mode,
};
}
}
if e < MAX_EXPONENT {
let count_alt = sample
.iter()
.filter(|&&v| try_alp_encode(v, factor, e + 1, mode).is_some())
.count();
if count_alt > best_count {
best_count = count_alt;
best = AlpParams {
encode_exp: e,
decode_exp: e + 1,
mode,
};
}
}
}
}
best
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty_roundtrip() {
let encoded = encode(&[]);
let decoded = decode(&encoded).unwrap();
assert!(decoded.is_empty());
}
#[test]
fn single_value() {
let values = vec![23.5f64];
let encoded = encode(&values);
let decoded = decode(&encoded).unwrap();
assert_eq!(decoded[0].to_bits(), values[0].to_bits());
}
#[test]
fn integer_values() {
let values: Vec<f64> = (0..1000).map(|i| i as f64).collect();
let encoded = encode(&values);
let decoded = decode(&encoded).unwrap();
for (a, b) in values.iter().zip(decoded.iter()) {
assert_eq!(a.to_bits(), b.to_bits(), "mismatch: {a} vs {b}");
}
}
#[test]
fn decimal_values_one_digit() {
let values: Vec<f64> = (0..1000).map(|i| i as f64 * 0.1).collect();
let encoded = encode(&values);
let decoded = decode(&encoded).unwrap();
for (i, (a, b)) in values.iter().zip(decoded.iter()).enumerate() {
assert_eq!(a.to_bits(), b.to_bits(), "mismatch at {i}: {a} vs {b}");
}
}
#[test]
fn decimal_values_two_digits() {
let values: Vec<f64> = (0..1000).map(|i| i as f64 * 0.01).collect();
let encoded = encode(&values);
let decoded = decode(&encoded).unwrap();
for (i, (a, b)) in values.iter().zip(decoded.iter()).enumerate() {
assert_eq!(a.to_bits(), b.to_bits(), "mismatch at {i}: {a} vs {b}");
}
}
#[test]
fn typical_cpu_metrics() {
let mut values = Vec::with_capacity(10_000);
let mut rng: u64 = 42;
for _ in 0..10_000 {
rng = rng.wrapping_mul(6364136223846793005).wrapping_add(1);
let cpu = ((rng >> 33) as f64 / (u32::MAX as f64)) * 100.0;
let cpu = (cpu * 10.0).round() / 10.0;
values.push(cpu);
}
let encoded = encode(&values);
let decoded = decode(&encoded).unwrap();
for (i, (a, b)) in values.iter().zip(decoded.iter()).enumerate() {
assert_eq!(a.to_bits(), b.to_bits(), "mismatch at {i}: {a} vs {b}");
}
let raw_size = values.len() * 8;
let ratio = raw_size as f64 / encoded.len() as f64;
assert!(
ratio > 3.0,
"ALP should compress CPU metrics >3x, got {ratio:.1}x"
);
}
#[test]
fn temperature_readings() {
let mut values = Vec::with_capacity(10_000);
let mut rng: u64 = 99;
for _ in 0..10_000 {
rng = rng.wrapping_mul(6364136223846793005).wrapping_add(1);
let temp = ((rng >> 33) as f64 / (u32::MAX as f64)) * 90.0 - 40.0;
let temp = (temp * 100.0).round() / 100.0;
values.push(temp);
}
let encoded = encode(&values);
let decoded = decode(&encoded).unwrap();
for (i, (a, b)) in values.iter().zip(decoded.iter()).enumerate() {
assert_eq!(a.to_bits(), b.to_bits(), "mismatch at {i}: {a} vs {b}");
}
}
#[test]
fn exception_handling() {
let mut values = vec![23.5, 99.9, 100.1, 50.0];
values.push(f64::NAN);
values.push(f64::INFINITY);
values.push(f64::NEG_INFINITY);
values.push(std::f64::consts::PI);
let encoded = encode(&values);
let decoded = decode(&encoded).unwrap();
for (i, (a, b)) in values.iter().zip(decoded.iter()).enumerate() {
assert_eq!(
a.to_bits(),
b.to_bits(),
"mismatch at {i}: {a} ({:016x}) vs {b} ({:016x})",
a.to_bits(),
b.to_bits()
);
}
}
#[test]
fn all_exceptions() {
let values: Vec<f64> = (1..100).map(|i| std::f64::consts::PI * i as f64).collect();
let encoded = encode(&values);
let decoded = decode(&encoded).unwrap();
for (i, (a, b)) in values.iter().zip(decoded.iter()).enumerate() {
assert_eq!(a.to_bits(), b.to_bits(), "mismatch at {i}");
}
}
#[test]
fn encodability_check() {
let decimal_values: Vec<f64> = (0..1000).map(|i| i as f64 * 0.1).collect();
assert!(alp_encodability(&decimal_values) > 0.95);
let irrational_values: Vec<f64> =
(1..1000).map(|i| std::f64::consts::PI * i as f64).collect();
let irrational_enc = alp_encodability(&irrational_values);
assert!(
alp_encodability(&decimal_values) >= irrational_enc,
"decimal encodability should be >= irrational"
);
}
#[test]
fn better_than_gorilla_for_decimals() {
let values: Vec<f64> = (0..10_000).map(|i| i as f64 * 0.1).collect();
let alp_encoded = encode(&values);
let gorilla_encoded = crate::gorilla::encode_f64(&values);
assert!(
alp_encoded.len() < gorilla_encoded.len(),
"ALP ({} bytes) should beat Gorilla ({} bytes) on decimal data",
alp_encoded.len(),
gorilla_encoded.len()
);
}
#[test]
fn zero_values() {
let values = vec![0.0f64; 1000];
let encoded = encode(&values);
let decoded = decode(&encoded).unwrap();
for (a, b) in values.iter().zip(decoded.iter()) {
assert_eq!(a.to_bits(), b.to_bits());
}
}
#[test]
fn negative_zero() {
let values = vec![-0.0f64, 0.0, -0.0];
let encoded = encode(&values);
let decoded = decode(&encoded).unwrap();
for (i, (a, b)) in values.iter().zip(decoded.iter()).enumerate() {
assert_eq!(a.to_bits(), b.to_bits(), "mismatch at {i}");
}
}
#[test]
fn truncated_input_errors() {
assert!(decode(&[]).is_err());
assert!(decode(&[1, 0, 0, 0, 0, 0, 0, 0, 0]).is_err()); }
#[test]
fn large_dataset() {
let mut values = Vec::with_capacity(100_000);
let mut rng: u64 = 12345;
for _ in 0..100_000 {
rng = rng.wrapping_mul(6364136223846793005).wrapping_add(1);
let val = ((rng >> 33) as f64 / (u32::MAX as f64)) * 1000.0;
let val = (val * 100.0).round() / 100.0; values.push(val);
}
let encoded = encode(&values);
let decoded = decode(&encoded).unwrap();
assert_eq!(decoded.len(), values.len());
for (i, (a, b)) in values.iter().zip(decoded.iter()).enumerate() {
assert_eq!(a.to_bits(), b.to_bits(), "mismatch at {i}");
}
}
}