bpe_rs/
lib.rs

1pub mod bpe {
2    /// Code based on 1994 Philip Gage
3
4    use std::io::{Cursor, Seek};
5    
6    pub const DEFAULT_STACK_SIZE: usize = 5000;
7
8    fn getc(file: &mut Cursor<&[u8]>) -> i32 {
9        let c;
10        
11        if file.position() as usize >= file.get_ref().len() {
12            c = EOF;
13        } else {
14            c = file.get_ref()[file.position() as usize] as i32;
15            let _ = file.seek_relative(1);
16        }
17
18        c
19    }
20
21    /// Adapted from Philip Gage's `expand` function.
22    /// 
23    /// For `stack_size`, it's recommended to use `DEFAULT_STACK_SIZE`.
24    pub fn decode(input: &[u8], stack_size: usize) -> Vec<u8> {
25        let mut input = Cursor::new(input);
26        let mut output = Vec::new();
27
28        let mut left = [0u8; 256];
29        let mut right = [0u8; 256];
30        let mut stack = vec![0u8; stack_size];
31
32
33        let mut count: u16;
34        let mut c: u16;
35        
36        loop {
37            count = getc(&mut input) as u16;
38
39            if count == EOF as u16 {
40                break;
41            }
42
43            for i in 0..256 {
44                left[i as usize] = i as u8;
45            }
46
47            c = 0u16;
48            loop {
49                if count > 127 {
50                    c += count - 127;
51                    count = 0;
52                }
53
54                if c == 256 {
55                    break;
56                }
57
58                for _ in 0..=count {         
59                    left[c as usize] = getc(&mut input) as u8;
60                    if c != left[c as usize] as u16 {
61                        right[c as usize] = getc(&mut input) as u8;
62                    }
63                    c += 1;
64                }
65
66                if c == 256 {
67                    break;
68                }
69
70                count = getc(&mut input) as u16;
71            }
72
73            let mut size = 256 * getc(&mut input) + getc(&mut input);
74
75            let mut i = 0;
76
77            loop {
78                if i != 0 {
79                    i -= 1;
80                    c = stack[i as usize] as u16;
81                } else {
82                    let temp = size;
83                    size -= 1;
84
85                    if temp == 0 {
86                        break;
87                    }
88
89                    c = getc(&mut input) as u16;
90                }
91
92                if c == left[c as usize] as u16 {
93                    output.push(c as u8);
94                } else {
95                    let temp = i;
96                    i += 1;
97                    stack[temp as usize] = right[c as usize];
98                    
99                    let temp = i;
100                    i += 1;
101                    stack[temp as usize] = left[c as usize];
102                }
103            }
104        };
105
106        output
107    }
108
109    const BLOCKSIZE: usize = 10_000;
110    const HASHSIZE: usize = 8192;
111    const _MAXCHARS: usize = 220;
112    const _THRESHOLD: usize = 3;
113    const EOF: i32 = -1;
114
115    static mut ENC_BUFFER: [u8; BLOCKSIZE] = [0u8; BLOCKSIZE];
116    static mut ENC_LEFTCODE: [u8; 256] = [0u8; 256];
117    static mut ENC_RIGHTCODE: [u8; 256] = [0u8; 256];
118    static mut ENC_COUNT: [u8; HASHSIZE] = [0u8; HASHSIZE];
119    static mut ENC_LEFT: [u8; HASHSIZE] = [0u8; HASHSIZE];
120    static mut ENC_RIGHT: [u8; HASHSIZE] = [0u8; HASHSIZE];
121    static mut ENC_SIZE: i32 = 0;
122
123
124    unsafe fn lookup(a: u8, b: u8, hs: i32) -> i32 {
125        let mut index;
126
127        index = (a as i32 ^ (b as i32) << 5) as i32 & (hs - 1 as i32);
128
129
130        while (ENC_LEFT[index as usize] as i32 != a as i32
131            || ENC_RIGHT[index as usize] as i32 != b as i32)
132            && ENC_COUNT[index as usize] as i32 != 0 
133        {
134            index = (index + 1) & (hs - 1);
135        }
136
137        ENC_LEFT[index as usize] = a;
138        ENC_RIGHT[index as usize] = b;
139
140        index
141    }
142
143    unsafe fn fileread(
144        input: &mut Cursor<&[u8]>,
145        bs: i32,
146        hs: i32,
147        mc: i32
148    ) -> bool {
149        let mut index;
150        let mut used = 0i32;
151        
152        for c in 0..hs {
153            ENC_COUNT[c as usize] = 0;
154        }
155
156        for c in 0..256 {
157            ENC_LEFTCODE[c as usize] = c as u8;
158            ENC_RIGHTCODE[c as usize] = 0;
159        }
160        
161        
162        let mut c = 0i32;
163        ENC_SIZE = 0;
164        
165        while ENC_SIZE < bs && used < mc
166            && {
167                c = getc(input);
168                c != EOF
169            }
170        {
171            if ENC_SIZE > 0 {
172                index = lookup(
173                    ENC_BUFFER[(ENC_SIZE - 1 as i32) as usize],
174                    c as u8,
175                    hs
176                );
177
178                if (ENC_COUNT[index as usize] as i32) < 255 {
179                    ENC_COUNT[index as usize] = (ENC_COUNT[index as usize]).wrapping_add(1);
180                }
181            }
182
183            ENC_BUFFER[ENC_SIZE as usize] = c as u8;
184            ENC_SIZE += 1;
185
186            if ENC_RIGHTCODE[c as usize] == 0 {
187                ENC_RIGHTCODE[c as usize] = 1;
188                used += 1;
189            }
190        }
191
192
193        c == EOF
194    }
195
196    unsafe fn filewrite(file: &mut Vec<u8>) {
197        let mut len;
198        let mut c = 0i32;
199
200        while c < 256 {
201            if c == ENC_LEFTCODE[c as usize] as i32 {
202                len = 1;
203                c += 1;
204                
205                while len < 127 && c < 256
206                    && c == ENC_LEFTCODE[c as usize] as i32 
207                {
208                    len += 1;
209                    c += 1;
210                }
211                
212                file.push((len + 127) as u8);
213                len = 0;
214
215                if c == 256 {
216                    break;
217                }
218                
219            } else {
220                len = 0;
221                c += 1;
222
223                while (len < 127 && c < 256
224                    && c != ENC_LEFTCODE[c as usize] as i32)
225                    || (len < 125 && c < 254 
226                    && c + 1 != ENC_LEFTCODE[(c + 1) as usize] as i32)
227                {
228                    len += 1;
229                    c += 1;
230                }
231
232                file.push(len as u8);
233                c -= len + 1;
234            }
235
236            for _ in 0..=len {
237                file.push(ENC_LEFTCODE[c as usize]);
238
239                if c != ENC_LEFTCODE[c as usize] as i32 {
240                    file.push(ENC_RIGHTCODE[c as usize])
241                }
242
243                c += 1;
244            }
245        }
246
247        file.push((ENC_SIZE / 256) as u8);
248        file.push((ENC_SIZE % 256) as u8);
249
250        file.extend_from_slice(&ENC_BUFFER[..ENC_SIZE as usize]);
251    }
252
253    /// Adapted from Philip Gage's `compress` function.
254    pub fn encode(input: &[u8]) -> Vec<u8> {
255        let mut input = Cursor::new(input);
256
257        let (bs, hs, mc, th) = (8192, 4096, 200, 3);
258        let mut code: i32;
259        let mut leftch = 0;
260        let mut rightch = 0;
261        let mut output = Vec::new();
262        
263        // compress each data block until EOF
264        unsafe {
265            let mut done = false;
266            while !done {
267                done = fileread(&mut input, bs, hs, mc);
268                code = 256;
269                
270                // compress this block
271                loop {
272                    // get next unused char for pair code
273                    code -= 1;
274                    
275                    while code >= 0 {
276                        if code == ENC_LEFTCODE[code as usize] as i32
277                            && ENC_RIGHTCODE[code as usize] == 0
278                        {
279                            break;
280                        }
281
282                        code -= 1;
283                    }
284
285                    if code < 0 {
286                        break;
287                    }
288
289                    let mut best = 2;
290                    let mut index = 0;
291
292                    while index < hs {
293                        if ENC_COUNT[index as usize] as i32 > best {
294                            best = ENC_COUNT[index as usize] as i32;
295                            leftch = ENC_LEFT[index as usize] as i32;
296                            rightch = ENC_RIGHT[index as usize] as i32;
297                        }
298
299                        index += 1;
300                    }
301
302                    if best < th {
303                        break;
304                    }
305
306                    let oldsize = ENC_SIZE - 1;
307                    let mut w = 0i32;
308                    let mut r = 0i32;
309
310                    while r < oldsize {
311                        if ENC_BUFFER[r as usize] as i32 == leftch
312                            && ENC_BUFFER[(r + 1 as i32) as usize] as i32 == rightch
313                        {
314                            if r > 0 {
315                                index = lookup(
316                                    ENC_BUFFER[(w - 1) as usize],
317                                    leftch as u8,
318                                    hs
319                                );
320
321                                if ENC_COUNT[index as usize] as i32 > 1 {
322                                    ENC_COUNT[index as usize] = (ENC_COUNT[index as usize]).wrapping_sub(1);
323                                }
324
325                                index = lookup(
326                                    ENC_BUFFER[(w - 1) as usize],
327                                    code as u8,
328                                    hs
329                                );
330
331                                if (ENC_COUNT[index as usize] as i32) < 255 {
332                                    ENC_COUNT[index as usize] = (ENC_COUNT[index as usize]).wrapping_add(1);
333                                }
334                            }
335
336                            if r < oldsize - 1 {
337                                index = lookup(
338                                    rightch as u8,
339                                    ENC_BUFFER[(r + 2) as usize],
340                                    hs
341                                );
342
343                                if ENC_COUNT[index as usize] as i32 > 1 {
344                                    ENC_COUNT[index as usize] = (ENC_COUNT[index as usize]).wrapping_sub(1);
345                                }
346
347                                index = lookup(
348                                    code as u8,
349                                    ENC_BUFFER[(r + 2) as usize],
350                                    hs
351                                );
352
353                                if (ENC_COUNT[index as usize] as i32) < 255 {
354                                    ENC_COUNT[index as usize] = (ENC_COUNT[index as usize]).wrapping_add(1);
355                                }
356                            }
357
358                            ENC_BUFFER[w as usize] = code as u8;
359                            w += 1;
360                            r += 1;
361                            ENC_SIZE -= 1;
362                        } else {
363                            ENC_BUFFER[w as usize] = ENC_BUFFER[r as usize];
364                            w += 1;
365                        }
366
367                        r += 1;
368                    }
369
370                    ENC_BUFFER[w as usize] = ENC_BUFFER[r as usize];
371                    ENC_LEFTCODE[code as usize] = leftch as u8;
372                    ENC_RIGHTCODE[code as usize] = rightch as u8;
373                    index = lookup(leftch as u8, rightch as u8, hs);
374                    ENC_COUNT[index as usize] = 1 as i32 as u8;
375                }
376
377                filewrite(&mut output);
378            }
379        }
380
381        output
382    }
383    
384    
385}
386
387#[cfg(test)]
388mod tests {
389    use super::*;
390    use std::fs;
391
392    #[test]
393    fn check_encode() {
394        let target = fs::read("test_files/ferris-encoded.bin").unwrap();
395        let source = fs::read("test_files/ferris.png").unwrap();
396        
397        assert_eq!(target, bpe::encode(&source));
398    }
399
400    #[test]
401    fn check_decode() {
402        let target = fs::read("test_files/ferris.png").unwrap();
403        let source = fs::read("test_files/ferris-encoded.bin").unwrap();
404
405        assert_eq!(target, bpe::decode(&source, bpe::DEFAULT_STACK_SIZE));
406    }
407}