1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
// Different from compressor and decompressor configs, flags change the format
// of the .qco file.
// New flags may be added in over time in a backward-compatible way.

use std::cmp::min;
use std::convert::{TryFrom, TryInto};

use crate::{BitReader, BitWriter, CompressorConfig};
use crate::bits;
use crate::constants::{BITS_TO_ENCODE_DELTA_ENCODING_ORDER, MAX_DELTA_ENCODING_ORDER};
use crate::errors::{QCompressError, QCompressResult};

/// The configuration stored in a .qco file's header.
///
/// During compression, flags are determined based on your `CompressorConfig`
/// and the `q_compress` version.
/// Flags affect the encoding of the rest of the file, so decompressing with
/// the wrong flags will likely cause a corruption error.
///
/// Most users will not need to manually instantiate flags; that should be done
/// internally by `Compressor::from_config`.
/// However, in some circumstances you may want to inspect flags during
/// decompression.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Flags {
  /// Whether to use 5 bits to encode the length of a prefix,
  /// as opposed to 4.
  /// Earlier versions of `q_compress` used 4, which was insufficient for
  /// Huffman prefixes that could reach up to 23 in length
  /// (23 >= 16 = 2^4)
  /// in spiky distributions with high compression level.
  /// In later versions, this flag is always true.
  pub use_5_bit_prefix_len: bool,
  /// How many times delta encoding was applied during compression.
  /// This is stored as 3 bits to express 0-7
  /// See `CompressorConfig` for more details.
  pub delta_encoding_order: usize,
}

impl TryFrom<Vec<bool>> for Flags {
  type Error = QCompressError;

  fn try_from(bools: Vec<bool>) -> QCompressResult<Self> {
    // would be nice to make a bit reader to do this instead of keeping track of index manually
    let use_5_bit_prefix_len = bools[0];
    let delta_end_idx = 1 + BITS_TO_ENCODE_DELTA_ENCODING_ORDER;
    let delta_encoding_bits = &bools[1..delta_end_idx];
    let delta_encoding_order = bits::bits_to_usize(delta_encoding_bits);
    for &bit in bools.iter().skip(delta_end_idx) {
      if bit {
        return Err(QCompressError::compatibility(
          "cannot parse flags; likely written by newer version of q_compress"
        ));
      }
    }

    Ok(Self {
      use_5_bit_prefix_len,
      delta_encoding_order,
    })
  }
}

impl TryInto<Vec<bool>> for &Flags {
  type Error = QCompressError;

  fn try_into(self) -> QCompressResult<Vec<bool>> {
    let mut res = vec![self.use_5_bit_prefix_len];
    if self.delta_encoding_order > MAX_DELTA_ENCODING_ORDER {
      return Err(QCompressError::invalid_argument(format!(
        "delta encoding order may not exceed {} (was {})",
        MAX_DELTA_ENCODING_ORDER,
        self.delta_encoding_order,
      )));
    }
    let delta_bits = bits::usize_truncated_to_bits(self.delta_encoding_order, BITS_TO_ENCODE_DELTA_ENCODING_ORDER);
    res.extend(delta_bits);
    Ok(res)
  }
}

impl Flags {
  pub fn parse_from(reader: &mut BitReader) -> QCompressResult<Self> {
    reader.aligned_byte_idx()?; // assert it's byte-aligned
    let mut bools = Vec::new();
    loop {
      bools.extend(reader.read(7)?);
      if !reader.read_one()? {
        break;
      }
    }
    Self::try_from(bools)
  }

  pub fn write(&self, writer: &mut BitWriter) -> QCompressResult<()> {
    let bools: Vec<bool> = self.try_into()?;

    // reserve 1 bit at the end of every byte for whether there is a following
    // byte
    for i in 0_usize..(bools.len() / 7) + 1 {
      let start = i * 7;
      let end = min(start + 7, bools.len());
      writer.write(&bools[start..end]);
      if end < bools.len() {
        writer.write_one(true);
      }
    }
    writer.finish_byte();
    Ok(())
  }


  pub fn bits_to_encode_prefix_len(&self) -> usize {
    if self.use_5_bit_prefix_len {
      5
    } else {
      4
    }
  }
}

impl From<&CompressorConfig> for Flags {
  fn from(config: &CompressorConfig) -> Self {
    Flags {
      use_5_bit_prefix_len: true,
      delta_encoding_order: config.delta_encoding_order,
    }
  }
}