git_pack/data/output/entry/
mod.rs

1use std::{convert::TryFrom, io::Write};
2
3use git_hash::ObjectId;
4
5use crate::{data, data::output, find};
6
7///
8pub mod iter_from_counts;
9pub use iter_from_counts::function::iter_from_counts;
10
11/// The kind of pack entry to be written
12#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
13#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
14pub enum Kind {
15    /// A complete base object, including its kind
16    Base(git_object::Kind),
17    /// A delta against the object with the given index. It's always an index that was already encountered to refer only
18    /// to object we have written already.
19    DeltaRef {
20        /// The absolute index to the object to serve as base. It's up to the writer to maintain enough state to allow producing
21        /// a packed delta object from it.
22        object_index: usize,
23    },
24    /// A delta against the given object as identified by its `ObjectId`.
25    /// This is the case for thin packs only, i.e. those that are sent over the wire.
26    /// Note that there is the option of the `ObjectId` being used to refer to an object within
27    /// the same pack, but it's a discontinued practice which won't be encountered here.
28    DeltaOid {
29        /// The object serving as base for this delta
30        id: ObjectId,
31    },
32}
33
34/// The error returned by [`output::Entry::from_data()`].
35#[allow(missing_docs)]
36#[derive(Debug, thiserror::Error)]
37pub enum Error {
38    #[error("{0}")]
39    ZlibDeflate(#[from] std::io::Error),
40}
41
42impl output::Entry {
43    /// An object which can be identified as invalid easily which happens if objects didn't exist even if they were referred to.
44    pub fn invalid() -> output::Entry {
45        output::Entry {
46            id: git_hash::Kind::Sha1.null(), // NOTE: the actual object hash used in the repo doesn't matter here, this is a sentinel value.
47            kind: Kind::Base(git_object::Kind::Blob),
48            decompressed_size: 0,
49            compressed_data: vec![],
50        }
51    }
52
53    /// Returns true if this object doesn't really exist but still has to be handled responsibly
54    ///
55    /// Note that this is true for tree entries that are commits/git submodules, or for objects which aren't present in our local clone
56    /// due to shallow clones.
57    pub fn is_invalid(&self) -> bool {
58        self.id.is_null()
59    }
60
61    /// Create an Entry from a previously counted object which is located in a pack. It's `entry` is provided here.
62    /// The `version` specifies what kind of target `Entry` version the caller desires.
63    pub fn from_pack_entry(
64        mut entry: find::Entry,
65        count: &output::Count,
66        potential_bases: &[output::Count],
67        bases_index_offset: usize,
68        pack_offset_to_oid: Option<impl FnMut(u32, u64) -> Option<ObjectId>>,
69        target_version: crate::data::Version,
70    ) -> Option<Result<Self, Error>> {
71        if entry.version != target_version {
72            return None;
73        };
74
75        let pack_offset_must_be_zero = 0;
76        let pack_entry =
77            crate::data::Entry::from_bytes(&entry.data, pack_offset_must_be_zero, count.id.as_slice().len());
78
79        use crate::data::entry::Header::*;
80        match pack_entry.header {
81            Commit => Some(output::entry::Kind::Base(git_object::Kind::Commit)),
82            Tree => Some(output::entry::Kind::Base(git_object::Kind::Tree)),
83            Blob => Some(output::entry::Kind::Base(git_object::Kind::Blob)),
84            Tag => Some(output::entry::Kind::Base(git_object::Kind::Tag)),
85            OfsDelta { base_distance } => {
86                let pack_location = count.entry_pack_location.as_ref().expect("packed");
87                let base_offset = pack_location
88                    .pack_offset
89                    .checked_sub(base_distance)
90                    .expect("pack-offset - distance is firmly within the pack");
91                potential_bases
92                    .binary_search_by(|e| {
93                        e.entry_pack_location
94                            .as_ref()
95                            .expect("packed")
96                            .pack_offset
97                            .cmp(&base_offset)
98                    })
99                    .ok()
100                    .map(|idx| output::entry::Kind::DeltaRef {
101                        object_index: idx + bases_index_offset,
102                    })
103                    .or_else(|| {
104                        pack_offset_to_oid
105                            .and_then(|mut f| f(pack_location.pack_id, base_offset))
106                            .map(|id| output::entry::Kind::DeltaOid { id })
107                    })
108            }
109            RefDelta { base_id: _ } => None, // ref deltas are for thin packs or legacy, repack them as base objects
110        }
111        .map(|kind| {
112            Ok(output::Entry {
113                id: count.id.to_owned(),
114                kind,
115                decompressed_size: pack_entry.decompressed_size as usize,
116                compressed_data: {
117                    entry.data.copy_within(pack_entry.data_offset as usize.., 0);
118                    entry.data.resize(
119                        entry.data.len()
120                            - usize::try_from(pack_entry.data_offset).expect("offset representable as usize"),
121                        0,
122                    );
123                    entry.data
124                },
125            })
126        })
127    }
128
129    /// Create a new instance from the given `oid` and its corresponding git `obj`ect data.
130    pub fn from_data(count: &output::Count, obj: &git_object::Data<'_>) -> Result<Self, Error> {
131        Ok(output::Entry {
132            id: count.id.to_owned(),
133            kind: Kind::Base(obj.kind),
134            decompressed_size: obj.data.len(),
135            compressed_data: {
136                let mut out = git_features::zlib::stream::deflate::Write::new(Vec::new());
137                if let Err(err) = std::io::copy(&mut &*obj.data, &mut out) {
138                    match err.kind() {
139                        std::io::ErrorKind::Other => return Err(Error::ZlibDeflate(err)),
140                        err => unreachable!("Should never see other errors than zlib, but got {:?}", err,),
141                    }
142                };
143                out.flush()?;
144                out.into_inner()
145            },
146        })
147    }
148
149    /// Transform ourselves into pack entry header of `version` which can be written into a pack.
150    ///
151    /// `index_to_pack(object_index) -> pack_offset` is a function to convert the base object's index into
152    /// the input object array (if each object is numbered) to an offset into the pack.
153    /// This information is known to the one calling the method.
154    pub fn to_entry_header(
155        &self,
156        version: crate::data::Version,
157        index_to_base_distance: impl FnOnce(usize) -> u64,
158    ) -> crate::data::entry::Header {
159        assert!(
160            matches!(version, data::Version::V2),
161            "we can only write V2 pack entries for now"
162        );
163
164        use Kind::*;
165        match self.kind {
166            Base(kind) => {
167                use git_object::Kind::*;
168                match kind {
169                    Tree => data::entry::Header::Tree,
170                    Blob => data::entry::Header::Blob,
171                    Commit => data::entry::Header::Commit,
172                    Tag => data::entry::Header::Tag,
173                }
174            }
175            DeltaOid { id } => data::entry::Header::RefDelta { base_id: id.to_owned() },
176            DeltaRef { object_index } => data::entry::Header::OfsDelta {
177                base_distance: index_to_base_distance(object_index),
178            },
179        }
180    }
181}