1pub mod bpe {
2 use std::io::{Cursor, Seek};
5
6 pub const DEFAULT_STACK_SIZE: usize = 5000;
7
8 fn getc(file: &mut Cursor<&[u8]>) -> i32 {
9 let c;
10
11 if file.position() as usize >= file.get_ref().len() {
12 c = EOF;
13 } else {
14 c = file.get_ref()[file.position() as usize] as i32;
15 let _ = file.seek_relative(1);
16 }
17
18 c
19 }
20
21 pub fn decode(input: &[u8], stack_size: usize) -> Vec<u8> {
25 let mut input = Cursor::new(input);
26 let mut output = Vec::new();
27
28 let mut left = [0u8; 256];
29 let mut right = [0u8; 256];
30 let mut stack = vec![0u8; stack_size];
31
32
33 let mut count: u16;
34 let mut c: u16;
35
36 loop {
37 count = getc(&mut input) as u16;
38
39 if count == EOF as u16 {
40 break;
41 }
42
43 for i in 0..256 {
44 left[i as usize] = i as u8;
45 }
46
47 c = 0u16;
48 loop {
49 if count > 127 {
50 c += count - 127;
51 count = 0;
52 }
53
54 if c == 256 {
55 break;
56 }
57
58 for _ in 0..=count {
59 left[c as usize] = getc(&mut input) as u8;
60 if c != left[c as usize] as u16 {
61 right[c as usize] = getc(&mut input) as u8;
62 }
63 c += 1;
64 }
65
66 if c == 256 {
67 break;
68 }
69
70 count = getc(&mut input) as u16;
71 }
72
73 let mut size = 256 * getc(&mut input) + getc(&mut input);
74
75 let mut i = 0;
76
77 loop {
78 if i != 0 {
79 i -= 1;
80 c = stack[i as usize] as u16;
81 } else {
82 let temp = size;
83 size -= 1;
84
85 if temp == 0 {
86 break;
87 }
88
89 c = getc(&mut input) as u16;
90 }
91
92 if c == left[c as usize] as u16 {
93 output.push(c as u8);
94 } else {
95 let temp = i;
96 i += 1;
97 stack[temp as usize] = right[c as usize];
98
99 let temp = i;
100 i += 1;
101 stack[temp as usize] = left[c as usize];
102 }
103 }
104 };
105
106 output
107 }
108
109 const BLOCKSIZE: usize = 10_000;
110 const HASHSIZE: usize = 8192;
111 const _MAXCHARS: usize = 220;
112 const _THRESHOLD: usize = 3;
113 const EOF: i32 = -1;
114
115 static mut ENC_BUFFER: [u8; BLOCKSIZE] = [0u8; BLOCKSIZE];
116 static mut ENC_LEFTCODE: [u8; 256] = [0u8; 256];
117 static mut ENC_RIGHTCODE: [u8; 256] = [0u8; 256];
118 static mut ENC_COUNT: [u8; HASHSIZE] = [0u8; HASHSIZE];
119 static mut ENC_LEFT: [u8; HASHSIZE] = [0u8; HASHSIZE];
120 static mut ENC_RIGHT: [u8; HASHSIZE] = [0u8; HASHSIZE];
121 static mut ENC_SIZE: i32 = 0;
122
123
124 unsafe fn lookup(a: u8, b: u8, hs: i32) -> i32 {
125 let mut index;
126
127 index = (a as i32 ^ (b as i32) << 5) as i32 & (hs - 1 as i32);
128
129
130 while (ENC_LEFT[index as usize] as i32 != a as i32
131 || ENC_RIGHT[index as usize] as i32 != b as i32)
132 && ENC_COUNT[index as usize] as i32 != 0
133 {
134 index = (index + 1) & (hs - 1);
135 }
136
137 ENC_LEFT[index as usize] = a;
138 ENC_RIGHT[index as usize] = b;
139
140 index
141 }
142
143 unsafe fn fileread(
144 input: &mut Cursor<&[u8]>,
145 bs: i32,
146 hs: i32,
147 mc: i32
148 ) -> bool {
149 let mut index;
150 let mut used = 0i32;
151
152 for c in 0..hs {
153 ENC_COUNT[c as usize] = 0;
154 }
155
156 for c in 0..256 {
157 ENC_LEFTCODE[c as usize] = c as u8;
158 ENC_RIGHTCODE[c as usize] = 0;
159 }
160
161
162 let mut c = 0i32;
163 ENC_SIZE = 0;
164
165 while ENC_SIZE < bs && used < mc
166 && {
167 c = getc(input);
168 c != EOF
169 }
170 {
171 if ENC_SIZE > 0 {
172 index = lookup(
173 ENC_BUFFER[(ENC_SIZE - 1 as i32) as usize],
174 c as u8,
175 hs
176 );
177
178 if (ENC_COUNT[index as usize] as i32) < 255 {
179 ENC_COUNT[index as usize] = (ENC_COUNT[index as usize]).wrapping_add(1);
180 }
181 }
182
183 ENC_BUFFER[ENC_SIZE as usize] = c as u8;
184 ENC_SIZE += 1;
185
186 if ENC_RIGHTCODE[c as usize] == 0 {
187 ENC_RIGHTCODE[c as usize] = 1;
188 used += 1;
189 }
190 }
191
192
193 c == EOF
194 }
195
196 unsafe fn filewrite(file: &mut Vec<u8>) {
197 let mut len;
198 let mut c = 0i32;
199
200 while c < 256 {
201 if c == ENC_LEFTCODE[c as usize] as i32 {
202 len = 1;
203 c += 1;
204
205 while len < 127 && c < 256
206 && c == ENC_LEFTCODE[c as usize] as i32
207 {
208 len += 1;
209 c += 1;
210 }
211
212 file.push((len + 127) as u8);
213 len = 0;
214
215 if c == 256 {
216 break;
217 }
218
219 } else {
220 len = 0;
221 c += 1;
222
223 while (len < 127 && c < 256
224 && c != ENC_LEFTCODE[c as usize] as i32)
225 || (len < 125 && c < 254
226 && c + 1 != ENC_LEFTCODE[(c + 1) as usize] as i32)
227 {
228 len += 1;
229 c += 1;
230 }
231
232 file.push(len as u8);
233 c -= len + 1;
234 }
235
236 for _ in 0..=len {
237 file.push(ENC_LEFTCODE[c as usize]);
238
239 if c != ENC_LEFTCODE[c as usize] as i32 {
240 file.push(ENC_RIGHTCODE[c as usize])
241 }
242
243 c += 1;
244 }
245 }
246
247 file.push((ENC_SIZE / 256) as u8);
248 file.push((ENC_SIZE % 256) as u8);
249
250 file.extend_from_slice(&ENC_BUFFER[..ENC_SIZE as usize]);
251 }
252
253 pub fn encode(input: &[u8]) -> Vec<u8> {
255 let mut input = Cursor::new(input);
256
257 let (bs, hs, mc, th) = (8192, 4096, 200, 3);
258 let mut code: i32;
259 let mut leftch = 0;
260 let mut rightch = 0;
261 let mut output = Vec::new();
262
263 unsafe {
265 let mut done = false;
266 while !done {
267 done = fileread(&mut input, bs, hs, mc);
268 code = 256;
269
270 loop {
272 code -= 1;
274
275 while code >= 0 {
276 if code == ENC_LEFTCODE[code as usize] as i32
277 && ENC_RIGHTCODE[code as usize] == 0
278 {
279 break;
280 }
281
282 code -= 1;
283 }
284
285 if code < 0 {
286 break;
287 }
288
289 let mut best = 2;
290 let mut index = 0;
291
292 while index < hs {
293 if ENC_COUNT[index as usize] as i32 > best {
294 best = ENC_COUNT[index as usize] as i32;
295 leftch = ENC_LEFT[index as usize] as i32;
296 rightch = ENC_RIGHT[index as usize] as i32;
297 }
298
299 index += 1;
300 }
301
302 if best < th {
303 break;
304 }
305
306 let oldsize = ENC_SIZE - 1;
307 let mut w = 0i32;
308 let mut r = 0i32;
309
310 while r < oldsize {
311 if ENC_BUFFER[r as usize] as i32 == leftch
312 && ENC_BUFFER[(r + 1 as i32) as usize] as i32 == rightch
313 {
314 if r > 0 {
315 index = lookup(
316 ENC_BUFFER[(w - 1) as usize],
317 leftch as u8,
318 hs
319 );
320
321 if ENC_COUNT[index as usize] as i32 > 1 {
322 ENC_COUNT[index as usize] = (ENC_COUNT[index as usize]).wrapping_sub(1);
323 }
324
325 index = lookup(
326 ENC_BUFFER[(w - 1) as usize],
327 code as u8,
328 hs
329 );
330
331 if (ENC_COUNT[index as usize] as i32) < 255 {
332 ENC_COUNT[index as usize] = (ENC_COUNT[index as usize]).wrapping_add(1);
333 }
334 }
335
336 if r < oldsize - 1 {
337 index = lookup(
338 rightch as u8,
339 ENC_BUFFER[(r + 2) as usize],
340 hs
341 );
342
343 if ENC_COUNT[index as usize] as i32 > 1 {
344 ENC_COUNT[index as usize] = (ENC_COUNT[index as usize]).wrapping_sub(1);
345 }
346
347 index = lookup(
348 code as u8,
349 ENC_BUFFER[(r + 2) as usize],
350 hs
351 );
352
353 if (ENC_COUNT[index as usize] as i32) < 255 {
354 ENC_COUNT[index as usize] = (ENC_COUNT[index as usize]).wrapping_add(1);
355 }
356 }
357
358 ENC_BUFFER[w as usize] = code as u8;
359 w += 1;
360 r += 1;
361 ENC_SIZE -= 1;
362 } else {
363 ENC_BUFFER[w as usize] = ENC_BUFFER[r as usize];
364 w += 1;
365 }
366
367 r += 1;
368 }
369
370 ENC_BUFFER[w as usize] = ENC_BUFFER[r as usize];
371 ENC_LEFTCODE[code as usize] = leftch as u8;
372 ENC_RIGHTCODE[code as usize] = rightch as u8;
373 index = lookup(leftch as u8, rightch as u8, hs);
374 ENC_COUNT[index as usize] = 1 as i32 as u8;
375 }
376
377 filewrite(&mut output);
378 }
379 }
380
381 output
382 }
383
384
385}
386
387#[cfg(test)]
388mod tests {
389 use super::*;
390 use std::fs;
391
392 #[test]
393 fn check_encode() {
394 let target = fs::read("test_files/ferris-encoded.bin").unwrap();
395 let source = fs::read("test_files/ferris.png").unwrap();
396
397 assert_eq!(target, bpe::encode(&source));
398 }
399
400 #[test]
401 fn check_decode() {
402 let target = fs::read("test_files/ferris.png").unwrap();
403 let source = fs::read("test_files/ferris-encoded.bin").unwrap();
404
405 assert_eq!(target, bpe::decode(&source, bpe::DEFAULT_STACK_SIZE));
406 }
407}