1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
use super::stream;
use crate::{loose, zlib};
use git_object as object;
use miniz_oxide::inflate::decompress_to_vec_zlib;
use object::borrowed;
use smallvec::SmallVec;
use std::{io::Read, path::PathBuf};

/// Returned by [`loose::Object::decode()`] and [`loose::Object::stream()`]
#[derive(thiserror::Error, Debug)]
#[allow(missing_docs)]
pub enum Error {
    #[error("decompression of object data failed")]
    Decompress(#[from] zlib::Error),
    #[error(transparent)]
    Parse(#[from] borrowed::Error),
    #[error("Could not {action} data at '{path}'")]
    Io {
        source: std::io::Error,
        action: &'static str,
        path: PathBuf,
    },
}

// Decoding and streaming
impl loose::Object {
    /// Decode the object to make it's fields accessible in case of Trees, Tags and Commits.
    ///
    /// This is a zero-copy operation with data read from disk if needed and stored in memory.
    /// The returned [`borrowed::Object`] references this data where possible.
    ///
    /// **Note**: Blobs are also loaded into memory and are made available that way.
    /// Consider using `stream()` if large Blobs are expected.
    pub fn decode(&mut self) -> Result<borrowed::Object<'_>, Error> {
        self.decompress_all()?;
        let bytes = &self.decompressed_data[self.header_size..];
        Ok(borrowed::Object::from_bytes(self.kind, bytes)?)
    }

    /// Returns an implementation of [`std::io::Read`], which decompresses the objects data on the fly.
    ///
    /// **Note**: This is most useful for big blobs as these won't be read into memory in full. Use [`decode()`][loose::Object::decode()] for
    /// Trees, Tags and Commits instead for convenient access to their payload.
    pub fn stream(&mut self) -> Result<stream::Reader<'_>, Error> {
        match &self.path {
            Some(path) => Ok(stream::Reader::from_file(
                self.header_size,
                std::fs::File::open(path).map_err(|source| Error::Io {
                    source,
                    action: "open",
                    path: path.to_owned(),
                })?,
            )),
            None => {
                self.decompress_all()?;
                Ok(stream::Reader::from_data(
                    self.header_size,
                    &self.decompressed_data.as_slice(),
                ))
            }
        }
    }

    pub(crate) fn decompress_all(&mut self) -> Result<(), Error> {
        if self.decompression_complete {
            debug_assert!(
                self.size + self.header_size == self.decompressed_data.len(),
                "when decompression is done, we have stored everything in memory"
            );
            return Ok(());
        }
        let total_size = self.header_size + self.size;
        if let Some(path) = self.path.take() {
            // NOTE: For now we just re-read everything from the beginning without seeking, as our buffer
            // is small so the seek might be more expensive than just reading everything.
            let mut file = std::fs::File::open(&path).map_err(|source| Error::Io {
                source,
                action: "open",
                path: path.clone(),
            })?;
            let file_size = file
                .metadata()
                .map_err(|source| Error::Io {
                    source,
                    action: "read metadata",
                    path: path.clone(),
                })?
                .len() as usize;
            let mut buf = Vec::with_capacity(file_size);
            file.read_to_end(&mut buf).map_err(|source| Error::Io {
                source,
                action: "read",
                path,
            })?;
            self.compressed_data = SmallVec::from(buf);
        }
        self.decompressed_data = SmallVec::from(decompress_to_vec_zlib(&self.compressed_data[..]).unwrap());
        self.compressed_data = Default::default();
        self.decompressed_data.shrink_to_fit();
        assert!(self.decompressed_data.len() == total_size);
        self.decompression_complete = true;
        Ok(())
    }
}