1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the Apache License, Version 2.0. See LICENSE.txt in the project root for license information.
* This software incorporates material from third parties. See NOTICE.txt for details.
*--------------------------------------------------------------------------------------------*/
use std::io::Read;
use crate::{helpers::err_exit_code, jpeg_code};
use crate::lepton_error::ExitCode;
// Implemenation of bit reader on top of JPEG data stream as read by a reader
pub struct BitReader<R> {
inner: R,
bits: u64,
num_bits: u8,
cpos: u32,
offset: i32, // offset of next bit that we will read in the file
eof: bool,
prev_offset: i32, // position of last escape. used to adjust the current position.
last_byte_read: u8,
}
impl<R: Read> BitReader<R> {
pub fn new(inner: R) -> Self {
BitReader {
inner: inner,
bits: 0,
num_bits: 0,
cpos: 0,
offset: 0,
eof: false,
prev_offset: 0,
last_byte_read: 0,
}
}
#[inline(always)]
pub fn read(&mut self, bits_to_read: u8) -> std::io::Result<u16> {
if bits_to_read == 0 {
return Ok(0);
}
if self.num_bits < bits_to_read {
self.fill_register(bits_to_read)?;
}
let retval = ((self.bits >> (64 - bits_to_read)) & ((1 << bits_to_read) - 1)) as u16;
self.bits <<= bits_to_read as usize;
self.num_bits -= bits_to_read;
return Ok(retval);
}
fn fill_register(&mut self, bits_to_read: u8) -> Result<(), std::io::Error> {
while self.num_bits < bits_to_read {
// Fill with zero bits if we have reached the end.
let mut buffer = [0u8];
if self.inner.read(&mut buffer)? == 0 {
// in case of a truncated file, we treat the rest of the file as zeros, but the
// bits that were ok still get returned so that we get the partial last byte right
// the caller periodically checks for EOF to see if it should stop encoding
self.eof = true;
self.num_bits += 8;
self.prev_offset = self.offset;
self.last_byte_read = 0;
// continue since we still might need to read more 0 bits
continue;
}
// 0xff is an escape code, if the next by is zero, then it is just a normal 0
// otherwise it is a reset code, which should also be skipped
if buffer[0] == 0xff {
if self.inner.read(&mut buffer)? == 0 {
// Handle case of truncation: Since we assume that everything passed the end
// is a 0, if the file ends with 0xFF, then we have to assume that this was
// an escaped 0xff. Don't mark as eof yet, since there are still the 8 bits to read.
self.prev_offset = self.offset;
self.offset += 1; // we only have 1 byte to advance in the stream and don't want to go past EOF.
self.bits |= (0xff as u64) << (56 - self.num_bits);
self.num_bits += 8;
self.last_byte_read = 0xff;
// continue since we still might need to read more 0 bits
continue;
}
if buffer[0] == 0 {
// this was an escaped FF
self.prev_offset = self.offset;
self.offset += 2;
self.bits |= (0xff as u64) << (56 - self.num_bits);
self.num_bits += 8;
self.last_byte_read = 0xff;
} else {
// verify_reset_code should get called in all instances where there should be a reset code. If we find one that
// is not where it is supposed to be, then we would fail to roundtrip the reset code, so just fail.
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!(
"invalid reset {0:x} {1:x} code found in stream at offset {2}",
0xff, buffer[0], self.offset
),
));
}
} else {
self.prev_offset = self.offset;
self.offset += 1;
self.bits |= (buffer[0] as u64) << (56 - self.num_bits);
self.num_bits += 8;
self.last_byte_read = buffer[0];
}
}
Ok(())
}
pub fn get_stream_position(&self) -> i32 {
// if there are still bits left, then we should be referring to the previous offset
if self.num_bits > 0 {
// if we still have bits, we need to go back to the last offset
return self.prev_offset;
} else {
return self.offset;
}
}
pub fn is_eof(&mut self) -> bool {
return self.eof;
}
/// used to verify whether this image is using 1s or 0s as fill bits.
/// Returns whether the fill bit was 1 or so or unknown (None)
pub fn read_and_verify_fill_bits(&mut self, pad_bit: &mut Option<u8>) -> anyhow::Result<()> {
// if there are bits left, we need to see whether they
// are 1s or zeros.
if self.num_bits > 0 && !self.eof {
let num_bits_to_read = self.num_bits;
let actual = self.read(num_bits_to_read)?;
let all_one = (1 << num_bits_to_read) - 1;
match *pad_bit {
None => {
if actual == 0 {
*pad_bit = Some(0);
} else if actual == all_one {
*pad_bit = Some(0xff);
} else {
return err_exit_code(
ExitCode::UnsupportedJpeg,
format!(
"inconsistent pad bits num_bits={0} pattern={1:b}",
num_bits_to_read, actual
)
.as_str(),
);
}
}
Some(x) => {
// if we already saw a padding, then it should match
let expected = u16::from(x) & all_one;
if actual != expected {
return err_exit_code(ExitCode::UnsupportedJpeg, format!("padding of {0} bits should be set to 1 actual={1:b} expected={2:b}", num_bits_to_read, actual, expected).as_str());
}
}
}
}
return Ok(());
}
pub fn verify_reset_code(&mut self) -> anyhow::Result<()> {
// we reached the end of a MCU, so we need to find a reset code and the huffman codes start get padded out, but the spec
// doesn't specify whether the padding should be 1s or 0s, so we ensure that at least the file is consistant so that we
// can recode it again just by remembering the pad bit.
let mut h = [0u8; 2];
self.inner.read_exact(&mut h)?;
if h[0] != 0xff || h[1] != (jpeg_code::RST0 + (self.cpos as u8 & 7)) {
return err_exit_code(
ExitCode::UnsupportedJpeg,
format!(
"invalid reset code {0:x} {1:x} found in stream at offset {2}",
h[0], h[1], self.offset
)
.as_str(),
);
}
// start from scratch after RST
self.cpos += 1;
self.offset += 2;
self.prev_offset = self.offset;
self.bits = 0;
self.num_bits = 0;
Ok(())
}
/// Retrieves the byte containing the next bit to be read in the stream, with only
/// the bits that have already been read in it possibly set, and all the rest of the
/// bits cleared.
///
/// bitsAlreadyRead: the number of bits already read from the current byte
/// byteBeingRead: the byte currently being read, with any bits not read from it yet cleared (0'ed)
pub fn overhang(&self) -> (u8, u8) {
let bits_already_read = ((64 - self.num_bits) & 7) as u8; // already read bits in the current byte
let mask = (((1 << bits_already_read) - 1) << (8 - bits_already_read)) as u8;
return (bits_already_read, self.last_byte_read & mask);
}
}
#[cfg(test)]
use std::io::Cursor;
// test reading a simple bit pattern with an escaped 0xff inside it.
#[test]
fn read_simple() {
let arr = [0x12 as u8, 0x34, 0x45, 0x67, 0x89, 0xff, 00, 0xee];
let mut b = BitReader::new(Cursor::new(&arr));
assert_eq!(1, b.read(4).unwrap());
assert_eq!((4, 0x10), b.overhang());
assert_eq!(0, b.get_stream_position());
assert_eq!(2, b.read(4).unwrap());
assert_eq!((0, 0), b.overhang()); // byte is aligned should be no overhang
assert_eq!(1, b.get_stream_position());
assert_eq!(3, b.read(4).unwrap());
assert_eq!(4, b.read(4).unwrap());
assert_eq!(4, b.read(4).unwrap());
assert_eq!(0x56, b.read(8).unwrap()); // 8 bits between 0x45 and 0x67
assert_eq!(0x78, b.read(8).unwrap());
assert_eq!(0x9f, b.read(8).unwrap());
assert_eq!((4, 0xf0), b.overhang());
assert_eq!(5, b.get_stream_position()); // should be at the beginning of the escape code
assert_eq!(0xfe, b.read(8).unwrap());
assert_eq!((4, 0xe0), b.overhang());
assert_eq!(7, b.get_stream_position()); // now we are after the escape code
assert_eq!(0xe, b.read(4).unwrap());
assert_eq!((0, 0), b.overhang());
assert_eq!(8, b.get_stream_position()); // now we read everything and should be at the end of the stream
// read an empty byte passed the end of the stream.. should be zero and trigger EOF
assert_eq!(0, b.read(8).unwrap());
assert_eq!(true, b.is_eof());
assert_eq!(8, b.get_stream_position()); // still at the same position
}
// what happens when a file has 0xff as the last character (assume that it is an escaped 0xff)
#[test]
fn read_truncate_ff() {
let arr = [0x12 as u8, 0xff];
let mut b = BitReader::new(Cursor::new(&arr));
assert_eq!(0, b.get_stream_position());
assert_eq!(0x1, b.read(4).unwrap());
assert_eq!(0, b.get_stream_position());
assert_eq!(0x2f, b.read(8).unwrap());
assert_eq!((4, 0xf0), b.overhang());
assert_eq!(1, b.get_stream_position());
// 4 bits left, not EOF yet
assert_eq!(false, b.is_eof());
assert_eq!(0xf, b.read(4).unwrap());
assert_eq!(false, b.is_eof()); // now we are at the end really
assert_eq!(2, b.get_stream_position());
assert_eq!(0, b.read(4).unwrap());
assert_eq!(true, b.is_eof());
assert_eq!(2, b.get_stream_position());
}