1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
use std::cmp::min;

use crate::{BitWriter, Compressor, CompressorConfig, Decompressor};
use crate::data_types::NumberLike;
use crate::errors::QCompressResult;

const AUTO_DELTA_LIMIT: usize = 1000;

/// Automatically makes an educated guess for the best compression
/// configuration, based on `nums` and `compression_level`,
/// then compresses the numbers to bytes.
///
/// This adds some compute cost by trying different configurations on a subset
/// of the numbers to determine the most likely one to do well.
/// If you know what configuration you want ahead of time (namely delta
/// encoding order), you can use [`Compressor::from_config`] instead to spare
/// the compute cost.
/// See [`CompressorConfig`] for information about compression levels.
pub fn auto_compress<T: NumberLike>(nums: &[T], compression_level: usize) -> Vec<u8> {
  let compressor = Compressor::from_config(auto_compressor_config(nums, compression_level));
  compressor.simple_compress(nums)
}

/// Automatically makes an educated guess for the best decompression
/// configuration, then decompresses the bytes into numbers.
///
/// There are currently no fields in the decompression configuration, so there
/// is no compute downside to using this function.
pub fn auto_decompress<T: NumberLike>(bytes: &[u8]) -> QCompressResult<Vec<T>> {
  let decompressor = Decompressor::<T>::default();
  decompressor.simple_decompress(bytes)
}

/// Automatically makes an educated guess for the best compression
/// configuration, based on `nums` and `compression_level`.
///
/// This has some compute cost by trying different configurations on a subset
/// of the numbers to determine the most likely one to do well.
/// See [`CompressorConfig`] for information about compression levels.
pub fn auto_compressor_config<T: NumberLike>(nums: &[T], compression_level: usize) -> CompressorConfig {
  let delta_encoding_order = auto_delta_encoding_order(nums, compression_level);
  CompressorConfig {
    compression_level,
    delta_encoding_order,
  }
}

fn auto_delta_encoding_order<T: NumberLike>(
  nums: &[T],
  compression_level: usize,
) -> usize {
  let head_nums = if nums.len() < AUTO_DELTA_LIMIT {
    nums
  } else {
    &nums[0..AUTO_DELTA_LIMIT]
  };
  let mut best_order = usize::MAX;
  let mut best_size = usize::MAX;
  for delta_encoding_order in 0..8 {
    let config = CompressorConfig {
      delta_encoding_order,
      compression_level: min(compression_level, 6),
    };
    let compressor = Compressor::<T>::from_config(config);
    let mut writer = BitWriter::default();
    compressor.chunk(head_nums, &mut writer).unwrap(); // only unreachable errors
    let size = writer.byte_size();
    if size < best_size {
      best_order = delta_encoding_order;
      best_size = size;
    } else {
      // it's almost always monotonic
      break;
    }
  }
  best_order
}

#[cfg(test)]
mod tests {
  use crate::auto::auto_delta_encoding_order;

  #[test]
  fn test_auto_delta_encoding_order() {
    let mut no_trend = Vec::new();
    let mut linear_trend = Vec::new();
    let mut quadratic_trend = Vec::new();
    let mut m = 1;
    for i in 0_i32..100_i32 {
      no_trend.push(m);
      m *= 77;
      m %= 100;
      linear_trend.push(i);
      quadratic_trend.push(i * i);
    }
    assert_eq!(auto_delta_encoding_order(&no_trend, 3), 0);
    assert_eq!(auto_delta_encoding_order(&linear_trend, 3), 1);
    assert_eq!(auto_delta_encoding_order(&quadratic_trend, 3), 2);
  }
}