1use std::{
8 io,
9 iter::{Fuse, FusedIterator},
10};
11
12use crate::{
13 bits::{ReadBits, WriteBits},
14 encode::VarCode,
15 math::Delta,
16 vle,
17};
18
19#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
21pub struct Token<T> {
22 pub prefix: usize,
24 pub literal: T,
26}
27
28impl<T> Delta for Token<T>
29where
30 T: Delta,
31{
32 fn delta(self, base: Self) -> Self {
33 if self.prefix == base.prefix {
34 Token {
35 prefix: 0,
36 literal: self.literal.delta(base.literal),
37 }
38 } else {
39 Token {
40 prefix: self.prefix - base.prefix,
41 literal: self.literal,
42 }
43 }
44 }
45
46 fn from_delta(base: Self, delta: Self) -> Self {
47 if delta.prefix == 0 {
48 Token {
49 prefix: base.prefix,
50 literal: T::from_delta(base.literal, delta.literal),
51 }
52 } else {
53 Token {
54 prefix: base.prefix + delta.prefix,
55 literal: delta.literal,
56 }
57 }
58 }
59}
60
61impl<T> VarCode for Token<T>
62where
63 T: VarCode,
64{
65 fn var_bit_len(&self) -> usize {
66 vle::encode_bit_len(self.prefix) + self.literal.var_bit_len()
67 }
68
69 fn var_write(&self, writer: &mut WriteBits<impl io::Write>) -> io::Result<()> {
70 vle::encode(self.prefix, writer)?;
71 self.literal.var_write(writer)
72 }
73
74 fn var_read(reader: &mut ReadBits<impl io::Read>) -> io::Result<Self> {
75 let prefix = vle::decode::<usize, _>(reader)?;
76 let literal = T::var_read(reader)?;
77
78 Ok(Token { prefix, literal })
79 }
80}
81
82#[derive(Clone, Copy, PartialEq, Eq)]
83struct Entry<T> {
84 prefix: usize,
85 literal: T,
86}
87
88pub struct Encoder<T> {
90 entires: Vec<Entry<T>>,
91 prefix: usize,
92}
93
94impl<T> Default for Encoder<T> {
95 fn default() -> Self {
96 Self::new()
97 }
98}
99
100impl<T> Encoder<T> {
101 pub fn new() -> Self {
103 Encoder {
104 entires: Vec::new(),
105 prefix: 0,
106 }
107 }
108}
109
110impl<T> Encoder<T>
111where
112 T: Copy + Eq,
113{
114 fn lookup(&self, entry: Entry<T>) -> Option<usize> {
115 for i in entry.prefix..self.entires.len() {
116 if self.entires[i] == entry {
117 return Some(i + 1);
118 }
119 }
120
121 None
122 }
123
124 pub fn encode(&mut self, symbol: T) -> Option<Token<T>> {
127 let entry = Entry {
128 prefix: self.prefix,
129 literal: symbol,
130 };
131 let index = self.lookup(entry);
132
133 match index {
134 None => {
135 debug_assert!(u32::try_from(self.prefix).is_ok());
136
137 let token = Token {
138 prefix: self.prefix,
139 literal: symbol,
140 };
141 self.entires.push(entry);
142 self.prefix = 0;
143 Some(token)
144 }
145 Some(index) => {
146 self.prefix = index;
147 None
148 }
149 }
150 }
151
152 pub fn finish(self) -> Option<Token<T>> {
154 { self }.finish_mut()
155 }
156
157 fn finish_mut(&mut self) -> Option<Token<T>> {
158 if self.prefix == 0 {
159 return None;
160 }
161
162 let last = &self.entires[self.prefix - 1];
163 self.prefix = 0;
164
165 Some(Token {
166 prefix: last.prefix,
167 literal: last.literal,
168 })
169 }
170
171 pub fn stream<I>(self, input: I) -> EncodeStream<T, I::IntoIter>
174 where
175 I: IntoIterator,
176 {
177 EncodeStream {
178 encoder: self,
179 input: input.into_iter().fuse(),
180 }
181 }
182}
183
184pub struct EncodeStream<T, I> {
187 encoder: Encoder<T>,
188 input: Fuse<I>,
189}
190
191impl<T, I> Iterator for EncodeStream<T, I>
192where
193 I: Iterator<Item = T>,
194 T: Eq + Copy,
195{
196 type Item = Token<T>;
197
198 fn next(&mut self) -> Option<Token<T>> {
199 for input in self.input.by_ref() {
200 if let Some(token) = self.encoder.encode(input) {
201 return Some(token);
202 }
203 }
204
205 self.encoder.finish_mut()
206 }
207
208 fn fold<B, F>(mut self, init: B, mut f: F) -> B
209 where
210 Self: Sized,
211 F: FnMut(B, Self::Item) -> B,
212 {
213 self.input
214 .fold(init, |state, input| match self.encoder.encode(input) {
215 None => state,
216 Some(token) => f(state, token),
217 })
218 }
219}
220
221impl<T, I> FusedIterator for EncodeStream<T, I>
222where
223 I: Iterator<Item = T>,
224 T: Eq + Copy,
225{
226}
227
228pub struct Decoder<T> {
230 scratch: Vec<T>,
231 entires: Vec<(usize, usize)>,
232}
233
234impl<T> Default for Decoder<T> {
235 fn default() -> Self {
236 Self::new()
237 }
238}
239
240impl<T> Decoder<T> {
241 pub fn new() -> Self {
243 Decoder {
244 scratch: Vec::new(),
245 entires: Vec::new(),
246 }
247 }
248}
249
250#[derive(Debug)]
252pub enum DecodeError {
253 InvalidIndex,
255}
256
257impl<T> Decoder<T>
258where
259 T: Copy + Eq,
260{
261 fn decode_next_range(&mut self, token: Token<T>) -> Result<(usize, usize), DecodeError> {
262 let (prefix_start, prefix_end) = if token.prefix > 0 {
264 if token.prefix > self.entires.len() {
265 return Err(DecodeError::InvalidIndex);
266 }
267 self.entires[token.prefix - 1]
268 } else {
269 (0, 0)
270 };
271
272 debug_assert!(prefix_end >= prefix_start);
273 let prefix_len = prefix_end - prefix_start;
274
275 let element = token.literal;
276
277 let end = if self.entires.is_empty() {
278 0
279 } else {
280 let (_start, end) = *self.entires.last().unwrap();
281 end
282 };
283
284 debug_assert_eq!(end, self.scratch.len());
285
286 self.scratch.extend_from_within(prefix_start..prefix_end);
287 self.scratch.push(element);
288
289 let new_start = end;
290 let new_end = new_start + prefix_len + 1;
291
292 self.entires.push((new_start, new_end));
293
294 Ok((new_start, new_end))
295 }
296
297 pub fn decode_next_slice(&mut self, token: Token<T>) -> Result<&[T], DecodeError> {
299 let output = self.decode_next_range(token)?;
300
301 let slice = &self.scratch[output.0..output.1];
302 Ok(slice)
303 }
304}
305
306#[test]
307fn test_u16() {
308 let mut encoder = Encoder::<u16>::new();
309 let mut compressed = Vec::new();
310
311 let data = [
312 1, 1, 2, 1, 1, 2, 3, 1, 2, 1, 1, 1, 2, 1, 1, 3, 3, 1, 1, 1, 2, 1, 1, 2, 1, 1, 2, 3, 1, 2,
313 1, 1, 2, 1, 1, 2, 3, 1, 2, 1, 1, 1, 2, 1, 1, 3, 3, 1, 1, 1, 2, 1, 1, 2, 1, 1, 2, 3, 1, 2,
314 1, 1, 2, 1, 1, 2, 3, 1, 2, 1, 1, 1, 2, 1, 1, 3, 3, 1, 1, 1, 2, 1, 1, 2, 1, 1, 2, 3, 1, 2,
315 1, 1, 2, 1, 1, 2, 3, 1, 2, 1, 1, 1, 2, 1, 1, 3, 3, 1, 1, 1, 2, 1, 1, 2, 1, 1, 2, 3, 1, 2,
316 1, 1, 2, 1, 1, 2, 3, 1, 2, 1, 1, 1, 2, 1, 1, 3, 3, 1, 1, 1, 2, 1, 1, 2, 1, 1, 2, 3, 1, 2,
317 1, 1, 1, 2, 1, 1, 3, 3, 1, 1, 1, 2, 1, 1, 2, 1, 1, 3, 3, 1, 1, 1, 2, 1, 3, 1, 1, 1, 2, 2,
318 1, 1, 3, 3, 1, 1, 1, 2, 1, 1, 3, 3, 1, 1, 1, 2, 1, 1, 2, 1, 1, 3, 3, 1, 1, 1, 2, 1, 3, 1,
319 1, 1, 2, 2, 1, 1, 3, 3, 1, 1, 3, 3, 1, 1, 1, 2, 1, 1, 3, 3, 1, 1, 1, 2, 1, 1, 2, 1, 1, 3,
320 3, 1, 1, 1, 2, 1, 3, 1, 1, 1, 2, 2, 1, 1, 3, 3,
321 ];
322
323 for symbol in data {
324 if let Some(token) = encoder.encode(symbol) {
325 compressed.push(token);
326 }
327 }
328
329 if let Some(token) = encoder.finish() {
330 compressed.push(token);
331 }
332
333 let mut decoder = Decoder::<u16>::new();
334 let mut decoded = 0;
335
336 for token in compressed {
337 let slice = decoder.decode_next_slice(token).unwrap();
338 assert_eq!(data[decoded..][..slice.len()], *slice);
339 decoded += slice.len();
340 }
341}