Skip to main content

gitoxide_core/pack/
explode.rs

1use std::{
2    fs,
3    io::Read,
4    path::Path,
5    sync::{Arc, atomic::AtomicBool},
6};
7
8use anyhow::{Result, anyhow};
9use gix::{
10    NestedProgress,
11    hash::ObjectId,
12    object, objs, odb,
13    odb::{loose, pack},
14    prelude::Write,
15};
16
17#[derive(Default, Clone, Eq, PartialEq, Debug)]
18pub enum SafetyCheck {
19    SkipFileChecksumVerification,
20    SkipFileAndObjectChecksumVerification,
21    SkipFileAndObjectChecksumVerificationAndNoAbortOnDecodeError,
22    #[default]
23    All,
24}
25
26impl SafetyCheck {
27    pub fn variants() -> &'static [&'static str] {
28        &[
29            "all",
30            "skip-file-checksum",
31            "skip-file-and-object-checksum",
32            "skip-file-and-object-checksum-and-no-abort-on-decode",
33        ]
34    }
35}
36
37impl std::str::FromStr for SafetyCheck {
38    type Err = String;
39
40    fn from_str(s: &str) -> Result<Self, Self::Err> {
41        Ok(match s {
42            "skip-file-checksum" => SafetyCheck::SkipFileChecksumVerification,
43            "skip-file-and-object-checksum" => SafetyCheck::SkipFileAndObjectChecksumVerification,
44            "skip-file-and-object-checksum-and-no-abort-on-decode" => {
45                SafetyCheck::SkipFileAndObjectChecksumVerificationAndNoAbortOnDecodeError
46            }
47            "all" => SafetyCheck::All,
48            _ => return Err(format!("Unknown value for safety check: '{s}'")),
49        })
50    }
51}
52
53impl From<SafetyCheck> for pack::index::traverse::SafetyCheck {
54    fn from(v: SafetyCheck) -> Self {
55        use pack::index::traverse::SafetyCheck::*;
56        match v {
57            SafetyCheck::All => All,
58            SafetyCheck::SkipFileChecksumVerification => SkipFileChecksumVerification,
59            SafetyCheck::SkipFileAndObjectChecksumVerification => SkipFileAndObjectChecksumVerification,
60            SafetyCheck::SkipFileAndObjectChecksumVerificationAndNoAbortOnDecodeError => {
61                SkipFileAndObjectChecksumVerificationAndNoAbortOnDecodeError
62            }
63        }
64    }
65}
66
67#[derive(Debug, thiserror::Error)]
68enum Error {
69    #[error("An IO error occurred while writing an object")]
70    Io(#[from] std::io::Error),
71    #[error("An object could not be written to the database")]
72    OdbWrite(#[from] loose::write::Error),
73    #[error("Failed to write {kind} object {id}")]
74    Write {
75        source: Box<dyn std::error::Error + Send + Sync>,
76        kind: object::Kind,
77        id: ObjectId,
78    },
79    #[error("Object didn't verify after right after writing it")]
80    Verify(#[from] objs::data::verify::Error),
81    #[error("{kind} object wasn't re-encoded without change")]
82    ObjectEncodeMismatch {
83        #[source]
84        source: gix::hash::verify::Error,
85        kind: object::Kind,
86    },
87    #[error("The recently written file for loose object {id} could not be found")]
88    WrittenFileMissing { id: ObjectId },
89    #[error("The recently written file for loose object {id} cold not be read")]
90    WrittenFileCorrupt { source: loose::find::Error, id: ObjectId },
91}
92
93#[allow(clippy::large_enum_variant)]
94#[derive(Clone)]
95enum OutputWriter {
96    Loose(loose::Store),
97    Sink(odb::Sink),
98}
99
100impl gix::objs::Write for OutputWriter {
101    fn write_buf(&self, kind: object::Kind, from: &[u8]) -> Result<ObjectId, gix::objs::write::Error> {
102        match self {
103            OutputWriter::Loose(db) => db.write_buf(kind, from),
104            OutputWriter::Sink(db) => db.write_buf(kind, from),
105        }
106    }
107
108    fn write_buf_with_known_id(
109        &self,
110        kind: object::Kind,
111        from: &[u8],
112        id: ObjectId,
113    ) -> Result<ObjectId, gix::objs::write::Error> {
114        match self {
115            OutputWriter::Loose(db) => db.write_buf_with_known_id(kind, from, id),
116            OutputWriter::Sink(db) => db.write_buf_with_known_id(kind, from, id),
117        }
118    }
119
120    fn write_stream(
121        &self,
122        kind: object::Kind,
123        size: u64,
124        from: &mut dyn Read,
125    ) -> Result<ObjectId, gix::objs::write::Error> {
126        match self {
127            OutputWriter::Loose(db) => db.write_stream(kind, size, from),
128            OutputWriter::Sink(db) => db.write_stream(kind, size, from),
129        }
130    }
131
132    fn write_stream_with_known_id(
133        &self,
134        kind: object::Kind,
135        size: u64,
136        from: &mut dyn Read,
137        id: ObjectId,
138    ) -> Result<ObjectId, gix::objs::write::Error> {
139        match self {
140            OutputWriter::Loose(db) => db.write_stream_with_known_id(kind, size, from, id),
141            OutputWriter::Sink(db) => db.write_stream_with_known_id(kind, size, from, id),
142        }
143    }
144}
145
146impl OutputWriter {
147    fn new(path: Option<impl AsRef<Path>>, compress: bool, object_hash: gix::hash::Kind) -> Self {
148        match path {
149            Some(path) => OutputWriter::Loose(loose::Store::at(path.as_ref(), object_hash, None)),
150            None => OutputWriter::Sink(odb::sink(object_hash).compress(compress)),
151        }
152    }
153}
154
155#[derive(Default)]
156pub struct Context {
157    pub thread_limit: Option<usize>,
158    pub delete_pack: bool,
159    pub sink_compress: bool,
160    pub verify: bool,
161    pub should_interrupt: Arc<AtomicBool>,
162    pub object_hash: gix::hash::Kind,
163}
164
165pub fn pack_or_pack_index(
166    pack_path: impl AsRef<Path>,
167    object_path: Option<impl AsRef<Path>>,
168    check: SafetyCheck,
169    mut progress: impl NestedProgress + 'static,
170    Context {
171        thread_limit,
172        delete_pack,
173        sink_compress,
174        verify,
175        should_interrupt,
176        object_hash,
177    }: Context,
178) -> Result<()> {
179    use anyhow::Context;
180
181    let path = pack_path.as_ref();
182    let bundle = pack::Bundle::at(path, object_hash).with_context(|| {
183        format!(
184            "Could not find .idx or .pack file from given file at '{}'",
185            path.display()
186        )
187    })?;
188
189    if !object_path.as_ref().is_none_or(|p| p.as_ref().is_dir()) {
190        return Err(anyhow!(
191            "The object directory at '{}' is inaccessible",
192            object_path
193                .expect("path present if no directory on disk")
194                .as_ref()
195                .display()
196        ));
197    }
198
199    let algorithm = object_path.as_ref().map_or_else(
200        || {
201            if sink_compress {
202                pack::index::traverse::Algorithm::Lookup
203            } else {
204                pack::index::traverse::Algorithm::DeltaTreeLookup
205            }
206        },
207        |_| pack::index::traverse::Algorithm::Lookup,
208    );
209
210    let pack::index::traverse::Outcome { .. } = bundle
211        .index
212        .traverse(
213            &bundle.pack,
214            &mut progress,
215            &should_interrupt,
216            {
217                let object_path = object_path.map(|p| p.as_ref().to_owned());
218                let out = OutputWriter::new(object_path.clone(), sink_compress, object_hash);
219                let loose_odb = verify
220                    .then(|| object_path.as_ref().map(|path| loose::Store::at(path, object_hash, None)))
221                    .flatten();
222                let mut read_buf = Vec::new();
223                move |object_kind, buf, index_entry, progress| {
224                    let written_id = out.write_buf(object_kind, buf).map_err(|err| Error::Write {
225                        source: err,
226                        kind: object_kind,
227                        id: index_entry.oid,
228                    })?;
229                    if let Err(err) = written_id.verify(&index_entry.oid) {
230                        if let object::Kind::Tree = object_kind {
231                            progress.info(format!(
232                                "The tree in pack named {} was written as {} due to modes 100664 and 100640 rewritten as 100644.",
233                                index_entry.oid, written_id
234                            ));
235                        } else {
236                            return Err(Error::ObjectEncodeMismatch {
237                                source: err,
238                                kind: object_kind,
239                            });
240                        }
241                    }
242                    if let Some(verifier) = loose_odb.as_ref() {
243                        let obj = verifier
244                            .try_find(&written_id, &mut read_buf)
245                            .map_err(|err| Error::WrittenFileCorrupt {
246                                source: err,
247                                id: written_id,
248                            })?
249                            .ok_or(Error::WrittenFileMissing { id: written_id })?;
250                        obj.verify_checksum(&written_id)?;
251                    }
252                    Ok(())
253                }
254            },
255            pack::index::traverse::Options {
256                traversal: algorithm,
257                thread_limit,
258                check: check.into(),
259                make_pack_lookup_cache: pack::cache::lru::StaticLinkedList::<64>::default,
260            },
261        )
262        .with_context(|| "Failed to explode the entire pack - some loose objects may have been created nonetheless")?;
263
264    let (index_path, data_path) = (bundle.index.path().to_owned(), bundle.pack.path().to_owned());
265    drop(bundle);
266
267    if delete_pack {
268        fs::remove_file(&index_path)
269            .and_then(|_| fs::remove_file(&data_path))
270            .with_context(|| {
271                format!(
272                    "Failed to delete pack index file at '{} or data file at '{}'",
273                    index_path.display(),
274                    data_path.display()
275                )
276            })?;
277        progress.info(format!(
278            "Removed '{}' and '{}'",
279            index_path.display(),
280            data_path.display()
281        ));
282    }
283    Ok(())
284}