1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
use std::io::{Read, Error as IOError, ErrorKind};
use std::fmt::{Formatter, Display, Error as FmtError};
use std::error::Error;
const NBBY: usize = 8; // Number of bits per byte
const MATCH_BITS: usize = 6;
const MATCH_MIN: usize = 3;
const MATCH_MAX: usize = ((1 << MATCH_BITS) + (MATCH_MIN - 1));
const OFFSET_MASK: usize = ((1 << (16 - MATCH_BITS)) - 1);
const LEMPEL_SIZE: usize = 1024;
pub struct LzjbEncoder<'a> {
src: &'a [u8],
}
impl<'a> LzjbEncoder<'a> {
fn new(src: &'a [u8]) -> LzjbEncoder<'a> {
LzjbEncoder {
src: src,
}
}
}
impl<'a> Read for LzjbEncoder<'a> {
/// LZJB compress the bytes in `src` into `dst`
fn read(&mut self, dst: &mut [u8]) -> Result<usize, IOError> {
let mut src_i = 0; // Current index in src
let mut dst_i = 0; // Current index in dst
// We place 1 extra byte preceding every 8 bytes. Each bit in this byte is
// a flag that corresponds to one of the 8 bytes that delimit it. If the
// flag is set, the byte is a copy item. If the flag is 0, it is a literal
// item. We'll call this the copy flag.
// Stores the index of the current copy flag in dst
let mut copymap = 0;
// The current bit in the byte pointed at by `copymap`
let mut copymask: usize = 1 << (NBBY - 1);
// This is our cache
let mut lempel = [0usize; LEMPEL_SIZE];
while src_i < self.src.len() {
copymask <<= 1;
if copymask == (1 << NBBY) {
// We've reached the end of our 8-byte cycle
if dst_i >= dst.len() - 1 - 2 * NBBY {
// If we've reached the last two bytes, we're done
return Ok(self.src.len());
}
// Not done yet, reset the cycle
copymask = 1;
copymap = dst_i; // Point to our new copy flag byte
dst[dst_i] = 0; // Place the new (initially clear) copy flag byte
dst_i += 1;
}
if src_i > self.src.len() - MATCH_MAX {
// Nearing the end of the data, don't bother searching for matches,
// just copy.
dst[dst_i] = self.src[src_i];
src_i += 1;
dst_i += 1;
continue;
}
// Compute hash of current 3 byte slice. It will be the index to our
// cache
let mut hash = ((self.src[src_i] as usize) << 16) + ((self.src[src_i + 1] as usize) << 8) +
(self.src[src_i + 2] as usize);
hash += hash >> 9;
hash += hash >> 5;
let hp = (hash as usize) & (LEMPEL_SIZE - 1);
// Look up the current 3 byte slice in the cache. We'll verify that it's
// a valid entry later.
let offset = (src_i - lempel[hp]) & OFFSET_MASK;
let cpy = src_i - offset;
// Set the current 3 byte slice as the most recent sighting of it in the
// cache
lempel[hp] = src_i;
// Check that the cached item is valid
if src_i >= offset && cpy != src_i && self.src[src_i] == self.src[cpy] &&
self.src[src_i + 1] == self.src[cpy + 1] && self.src[src_i + 2] == self.src[cpy + 2] {
// This cache item is valid, write a copy item
dst[copymap] |= copymask as u8; // Set the
// Find the full length of this match. Since it was in the hash,
// we know the match length is at least 3.
let mut mlen = MATCH_MIN;
while mlen < MATCH_MAX {
if self.src[src_i + mlen] != self.src[cpy + mlen] {
break;
}
mlen += 1;
}
// Place the match length portion of the copy item
dst[dst_i] = (((mlen - MATCH_MIN) << (NBBY - MATCH_BITS)) | (offset >> NBBY)) as u8;
dst_i += 1;
// Place the offset portion of the copy item
dst[dst_i] = offset as u8;
dst_i += 1;
// Now we get to skip the repeated sequence!
src_i += mlen;
} else {
// Not a real cache entry, don't make a copy item
dst[dst_i] = self.src[src_i];
dst_i += 1;
src_i += 1;
}
}
Ok(dst_i)
}
}
pub struct LzjbDecoder<'a> {
src: &'a [u8],
}
impl<'a> LzjbDecoder<'a> {
pub fn new(src: &'a [u8]) -> LzjbDecoder<'a> {
LzjbDecoder {
src: src,
}
}
}
#[derive(Debug)]
pub struct DecoderError;
impl Display for DecoderError {
fn fmt(&self, f: &mut Formatter) -> Result<(), FmtError> {
Ok(())
}
}
impl Error for DecoderError {
fn description(&self) -> &str {
"Failed to decode. The data is likely corrupted."
}
}
impl<'a> Read for LzjbDecoder<'a> {
/// LZJB compress the bytes in `src` into `dst`
fn read(&mut self, dst: &mut [u8]) -> Result<usize, IOError> {
let mut src_i = 0;
let mut dst_i = 0;
let mut copymap: u8 = 0;
let mut copymask: usize = 1 << (NBBY - 1);
while dst_i < dst.len() {
copymask <<= 1;
if copymask == (1 << NBBY) {
// Finished another 8-byte loop, repeat
copymask = 1; // Reset the copy mask
copymap = self.src[src_i]; // Current byte is the new copymap
src_i += 1;
}
if (copymap & (copymask as u8)) != 0 {
// Found a copy item
let mlen = ((self.src[src_i] as usize) >> (NBBY - MATCH_BITS)) + MATCH_MIN;
let offset = (((self.src[src_i] as usize) << NBBY) | (self.src[src_i + 1] as usize)) &
OFFSET_MASK;
src_i += 2;
if dst_i < offset {
// Copy item points to invalid index, error
return Err(IOError::new(ErrorKind::Other, DecoderError));
}
let mut cpy = dst_i - offset;
for _ in 0..mlen {
if dst_i >= dst.len() {
// Reached the end of the destination buffer, can't copy anymore
break;
}
dst[dst_i] = dst[cpy];
dst_i += 1;
cpy += 1;
}
} else {
// It's a literal item, copy it directly
dst[dst_i] = self.src[src_i];
dst_i += 1;
src_i += 1;
}
}
Ok(dst.len())
}
}