lzma/reader/
reader.rs

1use std::io::{self, Write, Read, Cursor};
2
3use {Error, Properties, properties};
4use consts::{LENGTH_TO_POSITION_STATES, ALIGN_BITS, END_POSITION_MODEL_INDEX};
5use consts::{FULL_DISTANCES, STATES, POSITION_BITS_MAX, MATCH_MINIMUM_LENGTH};
6use super::{Range, Window, Length, Probabilities, BitTree, State, Cache};
7
8/// A LZMA stream reader.
9#[derive(Debug)]
10pub struct Reader<R: Read> {
11	stream:  R,
12	decoded: u64,
13
14	properties: Properties,
15
16	// using an optional buffer and offset for leftovers so we avoid useless
17	// allocations and reallocations
18	buffer: Option<Vec<u8>>,
19	offset: usize,
20
21	range:  Range,
22	window: Window,
23
24	literal:  Probabilities,
25	position: Probabilities,
26
27	length: Length,
28	repeat: Length,
29
30	slot:  Vec<BitTree>,
31	align: BitTree,
32
33	state: u32,
34	rep:   [u32; 4],
35
36	is_match:     Probabilities,
37	is_rep:       Probabilities,
38	is_rep_g0:    Probabilities,
39	is_rep_g1:    Probabilities,
40	is_rep_g2:    Probabilities,
41	is_rep0_long: Probabilities,
42}
43
44impl<R: Read> Reader<R> {
45	/// Creates a LZMA reader with the given model properties and the given
46	/// stream.
47	pub fn new(stream: R, properties: Properties) -> Result<Reader<R>, Error> {
48		let window = Window::new(properties.dictionary);
49
50		let literal = Probabilities::new(0x300 << (properties.lc + properties.lp));
51
52		Ok(Reader {
53			stream:  stream,
54			decoded: 0,
55
56			properties: properties,
57
58			buffer: None,
59			offset: 0,
60
61			range:  Range::empty(),
62			window: window,
63
64			literal:  literal,
65			position: Probabilities::new(1 + FULL_DISTANCES - END_POSITION_MODEL_INDEX),
66
67			length: Length::new(),
68			repeat: Length::new(),
69
70			slot:  vec![BitTree::new(6); LENGTH_TO_POSITION_STATES],
71			align: BitTree::new(ALIGN_BITS),
72
73			state: 0,
74			rep:   [0; 4],
75
76			is_match:     Probabilities::new(STATES << POSITION_BITS_MAX),
77			is_rep:       Probabilities::new(STATES),
78			is_rep_g0:    Probabilities::new(STATES),
79			is_rep_g1:    Probabilities::new(STATES),
80			is_rep_g2:    Probabilities::new(STATES),
81			is_rep0_long: Probabilities::new(STATES << POSITION_BITS_MAX),
82		})
83	}
84
85	/// Creates a LZMA stream from the given stream, reading the model
86	/// properties.
87	pub fn from(mut stream: R) -> Result<Reader<R>, Error> {
88		let properties = try!(properties::read(stream.by_ref()));
89
90		Reader::new(stream, properties)
91	}
92
93	/// Returns the model properties.
94	pub fn properties(&self) -> &Properties {
95		&self.properties
96	}
97
98	/// Returns the size of the internal cache.
99	pub fn cached(&self) -> usize {
100		if let Some(buffer) = self.buffer.as_ref() {
101			buffer.len() - self.offset
102		}
103		else {
104			0
105		}
106	}
107
108	/// Unwraps this `Reader`, returning the underlying reader.
109	///
110	/// Note that any leftover data in the internal buffer is lost.
111	pub fn into_inner(self) -> R {
112		self.stream
113	}
114
115	/// Returns the inner stream mutably.
116	///
117	/// Note that reading bytes from the raw stream might corrupt the decoding.
118	pub unsafe fn inner(&mut self) -> &mut R {
119		&mut self.stream
120	}
121
122	/// Sets the uncompressed size.
123	///
124	/// Note that changing the uncompressed size might corrupt the decoding.
125	pub unsafe fn set_uncompressed(&mut self, value: Option<u64>) {
126		self.properties.uncompressed = value;
127	}
128
129	/// Resets the decoder.
130	///
131	/// Note that resetting might corrupt the decoding.
132	pub unsafe fn reset(&mut self, properties: Option<Properties>) {
133		if let Some(props) = properties {
134			self.properties.lc = props.lc;
135			self.properties.lp = props.lp;
136			self.properties.pb = props.pb;
137
138			self.literal = Probabilities::new(0x300 << (props.lc + props.lp));
139		}
140		else {
141			self.decoded = 0;
142
143			self.range.reset();
144			self.window.reset();
145
146			self.position.reset();
147
148			self.length.reset();
149			self.repeat.reset();
150
151			for bt in &mut self.slot {
152				bt.reset();
153			}
154
155			self.align.reset();
156
157			self.state = 0;
158			self.rep   = [0; 4];
159
160			self.is_match.reset();
161			self.is_rep.reset();
162			self.is_rep_g0.reset();
163			self.is_rep_g1.reset();
164			self.is_rep_g2.reset();
165			self.is_rep0_long.reset();
166		}
167	}
168
169	fn distance(&mut self, length: usize) -> Result<usize, Error> {
170		let state = if length > LENGTH_TO_POSITION_STATES - 1 {
171			LENGTH_TO_POSITION_STATES - 1
172		}
173		else {
174			length
175		};
176
177		let slot = try!(self.slot[state].decode(self.stream.by_ref(), &mut self.range));
178
179		if slot < 4 {
180			return Ok(slot);
181		}
182
183		let     direct   = (slot >> 1) - 1;
184		let mut distance = (2 | (slot & 1)) << direct;
185
186		if slot < END_POSITION_MODEL_INDEX {
187			distance += try!(super::probabilities::reverse(self.stream.by_ref(),
188				&mut self.position[distance - slot ..], direct, &mut self.range));
189		}
190		else {
191			distance += try!(self.range.direct(self.stream.by_ref(), direct - ALIGN_BITS)) << ALIGN_BITS;
192			distance += try!(self.align.reverse(self.stream.by_ref(), &mut self.range));
193		}
194
195		Ok(distance as usize)
196	}
197
198	fn literal<W: Write>(&mut self, writer: W, state: usize, rep0: u32) -> Result<(), Error> {
199		let prev = if !self.window.is_empty() {
200			self.window[1] as u32
201		}
202		else {
203			0
204		};
205
206		// it will contain the final byte with an additional 9th control bit
207		let mut byte = 1u32;
208
209		let lit = ((self.window.total() & ((1 << self.properties.lp) - 1)) << self.properties.lc)
210			+ (prev >> (8 - self.properties.lc as u32));
211
212		let probs = &mut self.literal[0x300 * lit as usize ..];
213
214		// we have to use the distance
215		if state >= 7 {
216			let mut match_byte = self.window[rep0 + 1];
217
218			while byte < 0b1_0000_0000 {
219				let match_bit = (match_byte >> 7) & 1;
220				match_byte <<= 1;
221
222				let bit = try!(self.range.probabilistic(self.stream.by_ref(),
223					&mut probs[(((1 + match_bit as u32) << 8) + byte) as usize]));
224
225				byte <<= 1;
226				byte  |= if bit { 1 } else { 0 };
227
228				if match_bit != if bit { 1 } else { 0 } {
229					break;
230				}
231			}
232		}
233
234		while byte < 0b1_0000_0000 {
235			let bit = try!(self.range.probabilistic(self.stream.by_ref(), &mut probs[byte as usize]));
236
237			byte <<= 1;
238			byte  |= if bit { 1 } else { 0 };
239		}
240
241		self.window.push(writer, byte as u8)
242	}
243
244	/// Decode one unit and return the decoded amount.
245	///
246	/// Note the writer should not do partial writes, or some of the decoded data
247	/// will be lost.
248	pub fn decode<W: Write>(&mut self, mut writer: W) -> Result<usize, Error> {
249		if !self.range.is_seeded() {
250			try!(self.range.seed(self.stream.by_ref()));
251		}
252
253		if let Some(size) = self.properties.uncompressed {
254			if self.decoded == size {
255				return Ok(0);
256			}
257		}
258		else {
259			if self.range.is_finished() {
260				return Err(Error::MissingMarker);
261			}
262		}
263
264		let pos = self.window.total() & ((1 << self.properties.pb) - 1);
265
266		if !try!(self.range.probabilistic(self.stream.by_ref(), &mut self.is_match[((pos << POSITION_BITS_MAX) + self.state) as usize])) {
267			// check if there's more data to read
268			if let Some(size) = self.properties.uncompressed {
269				if self.decoded == size {
270					return Err(Error::HasMoreData);
271				}
272			}
273
274			let rep   = self.rep[0];
275			let state = self.state;
276			try!(self.literal(writer.by_ref(), state as usize, rep));
277
278			self.state    = State::Literal(self.state).update();
279			self.decoded += 1;
280
281			return Ok(1);
282		}
283
284		let mut length;
285
286		if try!(self.range.probabilistic(self.stream.by_ref(), &mut self.is_rep[self.state as usize])) {
287			// check if there's more data to read
288			if let Some(size) = self.properties.uncompressed {
289				if self.decoded == size {
290					return Err(Error::HasMoreData);
291				}
292			}
293
294			if self.window.is_empty() {
295				return Err(Error::HasMoreData);
296			}
297
298			if !try!(self.range.probabilistic(self.stream.by_ref(), &mut self.is_rep_g0[self.state as usize])) {
299				if !try!(self.range.probabilistic(self.stream.by_ref(), &mut self.is_rep0_long[((self.state << POSITION_BITS_MAX) + pos) as usize])) {
300					let byte = self.window[self.rep[0] + 1];
301					try!(self.window.push(writer.by_ref(), byte));
302
303					self.state    = State::ShortRepetition(self.state).update();
304					self.decoded += 1;
305
306					return Ok(1);
307				}
308			}
309			else {
310				let distance;
311				
312				if !try!(self.range.probabilistic(self.stream.by_ref(), &mut self.is_rep_g1[self.state as usize])) {
313					distance = self.rep[1];
314				}
315				else {
316					if !try!(self.range.probabilistic(self.stream.by_ref(), &mut self.is_rep_g2[self.state as usize])) {
317						distance = self.rep[2];
318					}
319					else {
320						distance    = self.rep[3];
321						self.rep[3] = self.rep[2];
322					}
323
324					self.rep[2] = self.rep[1];
325				}
326
327				self.rep[1] = self.rep[0];
328				self.rep[0] = distance;
329			}
330
331			length = try!(self.repeat.decode(self.stream.by_ref(), &mut self.range, pos as usize));
332
333			self.state = State::Repetition(self.state).update();
334		}
335		else {
336			length = try!(self.length.decode(self.stream.by_ref(), &mut self.range, pos as usize));
337
338			self.rep[3] = self.rep[2];
339			self.rep[2] = self.rep[1];
340			self.rep[1] = self.rep[0];
341			self.rep[0] = try!(self.distance(length)) as u32;
342
343			// EOS marker found
344			if self.rep[0] == 0xffffffff {
345				// if the range finished correctly
346				if self.range.is_finished() {
347					// return error if EOS when the uncompressed size is defined
348					if let Some(size) = self.properties.uncompressed {
349						if self.decoded != size {
350							return Err(Error::NeedMoreData);
351						}
352					}
353
354					// return EOF
355					return Ok(0);
356				}
357				else {
358					return Err(Error::NeedMoreData);
359				}
360			}
361
362			if self.rep[0] >= self.properties.dictionary || !self.window.check(self.rep[0]) {
363				return Err(Error::Corrupted);
364			}
365
366			self.state = State::Match(self.state).update();
367		}
368
369		length += MATCH_MINIMUM_LENGTH;
370
371		if let Some(size) = self.properties.uncompressed {
372			if self.decoded + length as u64 > size {
373				return Err(Error::HasMoreData);
374			}
375		}
376
377		try!(self.window.copy(writer.by_ref(), self.rep[0] + 1, length));
378		self.decoded += length as u64;
379
380		Ok(length)
381	}
382}
383
384impl<R: Read> Read for Reader<R> {
385	fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
386		if buf.len() == 0 {
387			return Ok(0);
388		}
389
390		let     length = buf.len();
391		let mut target = Cursor::new(buf);
392
393		// we have some leftovers from the previous decode, try to flush those
394		if let Some(buffer) = self.buffer.take() {
395			let written  = try!(target.write(&buffer[self.offset..]));
396			self.offset += written;
397
398			if self.offset == buffer.len() {
399				self.buffer = None;
400			}
401			else {
402				self.buffer = Some(buffer);
403			}
404
405			return Ok(written);
406		}
407
408		let mut cache = Cache::new(target);
409
410		match self.decode(&mut cache) {
411			Err(Error::IO(err)) =>
412				Err(err),
413
414			Err(err) =>
415				Err(io::Error::new(io::ErrorKind::Other, err)),
416
417			Ok(0) =>
418				Ok(0),
419
420			Ok(written) => {
421				if let Some(cache) = cache.into_inner() {
422					self.buffer = Some(cache);
423					self.offset = 0;
424
425					Ok(length)
426				}
427				else {
428					Ok(written)
429				}
430			}
431		}
432	}
433}