1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
//! Encoding of values of any type to/from a sequence of code words of fixed bit length.

use std::io;
pub use minimum_redundancy::DecodingResult;
use std::borrow::Borrow;
use std::iter::FusedIterator;

pub use minimum_redundancy;

mod mr;
pub use mr::*;
mod geom;
pub use geom::*;

#[derive(Default, Copy, Clone)]
pub struct U8Code {
    pub content: u8,
    pub len: u8
}

/// Decoder that decodes a value for codeword given fragment by fragment.
pub trait Decoder {
    /// Type of value.
    type Value;

    /// Type returned by decoder. Usually equals `Self::Value` or `&Self::Value`.
    type Decoded: Borrow<Self::Value>;

    /// Consumes a `fragment` of the code and returns a value if the given `fragment` finishes the valid code.
    /// Returns `DecodingResult::Invalid` if fragments given so far do not constitute a valid code.
    fn consume_checked(&mut self, fragment: u8) -> DecodingResult<Self::Decoded>;

    /// Consumes a `fragment` of the code and returns a value if the given `fragment` finishes the valid code.
    /// The result is undefined if the fragments given so far do not constitute a valid code.
    ///
    /// Default implementation just calls `self.consume_checked(fragment)`.
    #[inline(always)] fn consume(&mut self, fragment: u8) -> DecodingResult<Self::Decoded> {
        self.consume_checked(fragment)
    }
}

/// A bijection between values and codewords.
/// Codewords are sequences of fragments.
/// Each fragment occupies constant number of bits.
pub trait Coding {
    /// Type of value.
    type Value;

    /// Type of decoder. Decoder decodes a value for code given fragment by fragment.
    type Decoder<'d>: Decoder<Value=Self::Value> where Self: 'd;

    /// Type of encoder. Encoder maps values to their codes.
    type Encoder<'e> where Self: 'e;

    /// Type of codeword.
    type Codeword: Copy + Sized + Sync;

    /// Number of bits needed to store codeword fragment.
    fn bits_per_fragment(&self) -> u8;

    /// Maximum value of fragment.
    fn max_fragment_value(&self) -> u8 {
        1u8.checked_shl(self.bits_per_fragment() as u32).map_or(u8::MAX, |v| v-1)
    }

    /// Returns decoder that allows for decoding a value.
    fn decoder(&self) -> Self::Decoder<'_>;

    /// Returns a map from values to their codewords.
    fn encoder(&self) -> Self::Encoder<'_>;

    /// Returns the length of `code` in fragments.
    fn len_of(&self, code: Self::Codeword) -> u8;

    /// Returns `index`-th fragment of `code`.
    fn fragment_of(&self, code: Self::Codeword, index: u8) -> u8;

    /// Returns last `index`-th fragment of `code`.
    #[inline] fn rev_fragment_of(&self, code: Self::Codeword, index: u8) -> u8 {
        self.fragment_of(code, self.len_of(code)-index-1)
    }

    /// Returns iterator over `code` fragments.
    #[inline] fn fragments_of(&self, code: Self::Codeword) -> FragmentsIterator<'_, Self> {
        FragmentsIterator { coding: &self, code }
    }

    /// Returns whether the `code` is empty (has zero fragments).
    #[inline] fn is_code_empty(&self, code: Self::Codeword) -> bool { self.len_of(code) == 0 }

    /// Returns the first fragment of the `code`.
    #[inline] fn first_fragment_of(&self, code: Self::Codeword) -> u8 { self.fragment_of(code, 0) }

    /// Extracts and returns the first fragment of the `code` or `None` if the `code` is already empty.
    fn extract_first_fragment_of(&self, code: &mut Self::Codeword) -> Option<u8> {
        (!self.is_code_empty(*code)).then(|| {
            let result = self.first_fragment_of(*code);
            self.remove_first_fragment_of(code);
            result
        })
    }

    /// Removes the first fragment of the `code` and returns whether it is empty now.
    fn remove_first_fragment_of(&self, code: &mut Self::Codeword) -> bool;

    /// Returns code of the value `to_encode`.
    fn code_of<'e, Q>(&self, encoder: &Self::Encoder<'e>, to_encode: &Q) -> Self::Codeword where Q: Borrow<Self::Value>;

    /// Returns the length (number of fragments) of code of the value `to_encode`.
    /// (this is the same value as `code(to_encode).fragments`, but `code_len` is faster for some encoders)
    #[inline(always)] fn len_of_encoded<'e, Q>(&self, encoder: &Self::Encoder<'e>, to_encode: &Q) -> u8 where Q: Borrow<Self::Value> {
        self.len_of(self.code_of(encoder, to_encode))
    }

    /// Returns iterator over `code` fragments.
    #[inline] fn fragments_of_encoded<'e, Q>(&self, encoder: &Self::Encoder<'e>, to_encode: &Q) -> FragmentsIterator<'_, Self>
        where Q: Borrow<Self::Value>
    {
        self.fragments_of(self.code_of(encoder, to_encode))
    }
}

/// Iterator over codeword fragments.
pub struct FragmentsIterator<'c, C: Coding + ?Sized> {
    coding: &'c C,
    code: C::Codeword
}

impl<'c, C: Coding> FusedIterator for FragmentsIterator<'c, C> {}

impl<'c, C: Coding> ExactSizeIterator for FragmentsIterator<'c, C> {
    #[inline] fn len(&self) -> usize {
        self.coding.len_of(self.code) as usize
    }
} 

impl<'c, C: Coding> Iterator for FragmentsIterator<'c, C> {
    type Item = u8;

    #[inline] fn next(&mut self) -> Option<Self::Item> {
        self.coding.extract_first_fragment_of(&mut self.code)
    }

    #[inline] fn size_hint(&self) -> (usize, Option<usize>) {
        let l = self.len(); (l, Some(l))
    }
}

/// Codings that implement `SerializableCoding` can be serialized/deserialized.
pub trait SerializableCoding: Coding {
    /// Returns the number of bytes which `write` will write
    /// assuming that each call to `write_value` writes `bytes_per_value` bytes.
    fn write_bytes(&self, bytes_per_value: usize) -> usize;

    /// Writes `self` to the `output`, using `write_value` to write values.
    fn write<F>(&self, output: &mut dyn io::Write, write_value: F) -> io::Result<()>
        where F: FnMut(&mut dyn io::Write, &Self::Value) -> io::Result<()>;

    /// Reads `self` from the input using `read_value` to read values.
    fn read<F>(input: &mut dyn io::Read, read_value: F) -> io::Result<Self>
        where F: FnMut(&mut dyn io::Read) -> io::Result<Self::Value>, Self: Sized;
}

/// Coding builder.
pub trait BuildCoding<V> {
    /// Type of coding that `self` builds.
    type Coding: Coding<Value=V>;

    /// Returns the name of `self`.
    ///
    /// The name can be used by some cache systems to check
    /// if coding built by `self` is already in cache.
    fn name(&self) -> String;

    /// Build coding that uses given number of `bits_per_fragment` and is optimal for data provided by `iter`.
    /// If `bits_per_fragment` is 0, it is set automatically.
    fn build_from_iter<Iter>(&self, iter: Iter, bits_per_fragment: u8) -> Self::Coding
        where Iter: IntoIterator, Iter::Item: Borrow<<Self::Coding as Coding>::Value>;
}

// Returns `fragment_nr`-th `bits_per_fragment`-bits fragment of `bits`.
/*#[inline(always)] pub fn get_u32_fragment(bits: u32, bits_per_fragment: u8, fragment_nr: u8) -> u32 {
    bits.checked_shr(bits_per_fragment as u32 * fragment_nr as u32).map_or(0, |v| v & ((1u32 << bits_per_fragment) - 1))
}*/