pco/ans/
spec.rs

1use crate::ans::Symbol;
2use crate::constants::{Bitlen, Weight};
3use crate::errors::{PcoError, PcoResult};
4
5// Here and in encoding/decoding, state is between [0, table_size)
6
7pub struct Spec {
8  // log base 2 of the table size
9  // e.g. the table states will be in [2^size_log, 2^(size_log + 1))
10  pub size_log: Bitlen,
11  // the ordered symbols in the table
12  pub state_symbols: Vec<Symbol>,
13  // the number of times each symbol appears in the table
14  pub symbol_weights: Vec<Weight>,
15}
16
17// We use a relatively prime (odd) number near 3/5 of the table size. In this
18// way, uncommon symbols with weight=2, 3, 4, 5 all get pretty reasonable
19// spreads (in a slightly more balanced way than e.g. 4/7 would):
20// * 2 -> [0, 0.6]
21// * 3 -> [0, 0.2, 0.6]
22// * 4 -> [0, 0.2, 0.6, 0.8]
23// * 5 -> [0, 0.2, 0.4, 0.6, 0.8]
24fn choose_stride(table_size: Weight) -> Weight {
25  let mut res = (3 * table_size) / 5;
26  if res.is_multiple_of(2) {
27    res += 1;
28  }
29  res
30}
31
32impl Spec {
33  // This needs to remain backward compatible.
34  // The general idea is to spread the symbols out as much as possible,
35  // deterministically, and ensuring each one gets as least one state.
36  // Long runs of symbols are generally bad.
37  fn spread_state_symbols(size_log: Bitlen, symbol_weights: &[Weight]) -> PcoResult<Vec<Symbol>> {
38    let table_size = symbol_weights.iter().sum::<Weight>();
39    if table_size != (1 << size_log) {
40      return Err(PcoError::corruption(format!(
41        "table size log of {} does not agree with total weight of {}",
42        size_log, table_size,
43      )));
44    }
45
46    let mut res = vec![0; table_size as usize];
47    let mut step = 0;
48    let stride = choose_stride(table_size);
49    let mod_table_size = Weight::MAX >> 1 >> (Weight::BITS as Bitlen - 1 - size_log);
50    for (symbol, &weight) in symbol_weights.iter().enumerate() {
51      for _ in 0..weight {
52        let state_idx = (stride * step) & mod_table_size;
53        res[state_idx as usize] = symbol as Symbol;
54        step += 1;
55      }
56    }
57
58    Ok(res)
59  }
60
61  pub fn from_weights(size_log: Bitlen, symbol_weights: Vec<Weight>) -> PcoResult<Self> {
62    let symbol_weights = if symbol_weights.is_empty() {
63      vec![1]
64    } else {
65      symbol_weights
66    };
67
68    let state_symbols = Self::spread_state_symbols(size_log, &symbol_weights)?;
69
70    Ok(Self {
71      size_log,
72      state_symbols,
73      symbol_weights,
74    })
75  }
76
77  pub fn table_size(&self) -> usize {
78    1 << self.size_log
79  }
80}
81
82#[cfg(test)]
83mod tests {
84  use crate::ans::spec::{Spec, Symbol};
85  use crate::constants::Weight;
86  use crate::errors::PcoResult;
87
88  fn assert_state_symbols(weights: Vec<Weight>, expected: Vec<Symbol>) -> PcoResult<()> {
89    let table_size_log = weights.iter().sum::<Weight>().ilog2();
90    let spec = Spec::from_weights(table_size_log, weights)?;
91    assert_eq!(spec.state_symbols, expected);
92    Ok(())
93  }
94
95  #[test]
96  fn ans_spec_new() -> PcoResult<()> {
97    assert_state_symbols(
98      vec![1, 1, 3, 11],
99      vec![0, 3, 2, 3, 2, 3, 3, 3, 3, 1, 3, 2, 3, 3, 3, 3],
100    )
101  }
102
103  #[test]
104  fn ans_spec_new_trivial() -> PcoResult<()> {
105    assert_state_symbols(vec![1], vec![0])?;
106    assert_state_symbols(vec![2], vec![0, 0])
107  }
108}