1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
// Copyright 2022 Red Hat
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Implementation of an API similar to zstd::stream::read::Decoder using
// zstd::stream::raw::Decoder. We need this because read::Decoder returns
// io::ErrorKind::Other if there's trailing data after a zstd stream, which
// can't be disambiguated from an actual error. By using the low-level API,
// we can check zstd::stream::raw::Status.remaining to see whether the
// decoder thinks it's at the end of a frame, check the upcoming bytes for
// the magic number of another frame, and decide whether we're done. The
// raw decoder always stops at frame boundaries, so this is reliable. If
// done, return Ok(0) and allow the caller to decide what it wants to do
// about trailing data.
use anyhow::{Context, Result};
use bytes::{Buf, BytesMut};
use std::io::{self, BufRead, Error, ErrorKind, Read};
use zstd::stream::raw::{Decoder, Operation};
use zstd::zstd_safe::{MAGICNUMBER, MAGIC_SKIPPABLE_MASK, MAGIC_SKIPPABLE_START};
use crate::io::PeekReader;
pub struct ZstdStreamDecoder<'a, R: Read> {
source: PeekReader<R>,
buf: BytesMut,
decoder: Decoder<'a>,
start_of_frame: bool,
}
impl<R: Read> ZstdStreamDecoder<'_, R> {
pub fn new(source: PeekReader<R>) -> Result<Self> {
Ok(Self {
source,
buf: BytesMut::new(),
decoder: Decoder::new().context("creating zstd decoder")?,
start_of_frame: true,
})
}
pub fn get_mut(&mut self) -> &mut PeekReader<R> {
&mut self.source
}
pub fn into_inner(self) -> PeekReader<R> {
self.source
}
}
impl<R: Read> Read for ZstdStreamDecoder<'_, R> {
fn read(&mut self, out: &mut [u8]) -> io::Result<usize> {
if out.is_empty() {
return Ok(0);
}
loop {
if !self.buf.is_empty() {
let count = self.buf.len().min(out.len());
self.buf.copy_to_slice(&mut out[..count]);
return Ok(count);
}
if self.start_of_frame {
let peek = self.source.peek(4)?;
if peek.len() < 4 || !is_zstd_magic(peek[0..4].try_into().unwrap()) {
// end of compressed data
return Ok(0);
}
self.start_of_frame = false;
}
let in_ = self.source.fill_buf()?;
if in_.is_empty() {
return Err(Error::new(
ErrorKind::UnexpectedEof,
"premature EOF reading zstd frame",
));
}
// unfortunately we have to initialize to 0 for safety
// BUFFER_SIZE is very large; use a smaller buffer to avoid
// unneeded reinitialization
self.buf.resize(16384, 0);
let status = self.decoder.run_on_buffers(in_, &mut self.buf)?;
self.source.consume(status.bytes_read);
self.buf.truncate(status.bytes_written);
if status.remaining == 0 {
self.start_of_frame = true;
}
}
}
}
pub fn is_zstd_magic(buf: [u8; 4]) -> bool {
let val = u32::from_le_bytes(buf);
val == MAGICNUMBER || val & MAGIC_SKIPPABLE_MASK == MAGIC_SKIPPABLE_START
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn small_decode() {
let mut compressed = Vec::new();
compressed.extend(include_bytes!("../../fixtures/verify/1M.zst"));
let uncompressed = zstd::stream::decode_all(&*compressed).unwrap();
compressed.extend(b"abcdefg");
let mut d = ZstdStreamDecoder::new(PeekReader::with_capacity(1, &*compressed)).unwrap();
let mut out = Vec::new();
let mut buf = [0u8];
loop {
match d.read(&mut buf).unwrap() {
0 => break,
1 => out.push(buf[0]),
_ => unreachable!(),
}
}
assert_eq!(&out, &uncompressed);
let mut remainder = Vec::new();
d.into_inner().read_to_end(&mut remainder).unwrap();
assert_eq!(&remainder, b"abcdefg");
}
}