gitoxide_core/pack/
explode.rs

1use std::{
2    fs,
3    io::Read,
4    path::Path,
5    sync::{atomic::AtomicBool, Arc},
6};
7
8use anyhow::{anyhow, Result};
9use gix::{
10    hash::ObjectId,
11    object, objs, odb,
12    odb::{loose, pack},
13    prelude::Write,
14    NestedProgress,
15};
16
17#[derive(Default, Clone, Eq, PartialEq, Debug)]
18pub enum SafetyCheck {
19    SkipFileChecksumVerification,
20    SkipFileAndObjectChecksumVerification,
21    SkipFileAndObjectChecksumVerificationAndNoAbortOnDecodeError,
22    #[default]
23    All,
24}
25
26impl SafetyCheck {
27    pub fn variants() -> &'static [&'static str] {
28        &[
29            "all",
30            "skip-file-checksum",
31            "skip-file-and-object-checksum",
32            "skip-file-and-object-checksum-and-no-abort-on-decode",
33        ]
34    }
35}
36
37impl std::str::FromStr for SafetyCheck {
38    type Err = String;
39
40    fn from_str(s: &str) -> Result<Self, Self::Err> {
41        Ok(match s {
42            "skip-file-checksum" => SafetyCheck::SkipFileChecksumVerification,
43            "skip-file-and-object-checksum" => SafetyCheck::SkipFileAndObjectChecksumVerification,
44            "skip-file-and-object-checksum-and-no-abort-on-decode" => {
45                SafetyCheck::SkipFileAndObjectChecksumVerificationAndNoAbortOnDecodeError
46            }
47            "all" => SafetyCheck::All,
48            _ => return Err(format!("Unknown value for safety check: '{s}'")),
49        })
50    }
51}
52
53impl From<SafetyCheck> for pack::index::traverse::SafetyCheck {
54    fn from(v: SafetyCheck) -> Self {
55        use pack::index::traverse::SafetyCheck::*;
56        match v {
57            SafetyCheck::All => All,
58            SafetyCheck::SkipFileChecksumVerification => SkipFileChecksumVerification,
59            SafetyCheck::SkipFileAndObjectChecksumVerification => SkipFileAndObjectChecksumVerification,
60            SafetyCheck::SkipFileAndObjectChecksumVerificationAndNoAbortOnDecodeError => {
61                SkipFileAndObjectChecksumVerificationAndNoAbortOnDecodeError
62            }
63        }
64    }
65}
66
67#[derive(Debug, thiserror::Error)]
68enum Error {
69    #[error("An IO error occurred while writing an object")]
70    Io(#[from] std::io::Error),
71    #[error("An object could not be written to the database")]
72    OdbWrite(#[from] loose::write::Error),
73    #[error("Failed to write {kind} object {id}")]
74    Write {
75        source: Box<dyn std::error::Error + Send + Sync>,
76        kind: object::Kind,
77        id: ObjectId,
78    },
79    #[error("Object didn't verify after right after writing it")]
80    Verify(#[from] objs::data::verify::Error),
81    #[error("{kind} object wasn't re-encoded without change")]
82    ObjectEncodeMismatch {
83        #[source]
84        source: gix::hash::verify::Error,
85        kind: object::Kind,
86    },
87    #[error("The recently written file for loose object {id} could not be found")]
88    WrittenFileMissing { id: ObjectId },
89    #[error("The recently written file for loose object {id} cold not be read")]
90    WrittenFileCorrupt { source: loose::find::Error, id: ObjectId },
91}
92
93#[allow(clippy::large_enum_variant)]
94#[derive(Clone)]
95enum OutputWriter {
96    Loose(loose::Store),
97    Sink(odb::Sink),
98}
99
100impl gix::objs::Write for OutputWriter {
101    fn write_buf(&self, kind: object::Kind, from: &[u8]) -> Result<ObjectId, gix::objs::write::Error> {
102        match self {
103            OutputWriter::Loose(db) => db.write_buf(kind, from),
104            OutputWriter::Sink(db) => db.write_buf(kind, from),
105        }
106    }
107
108    fn write_stream(
109        &self,
110        kind: object::Kind,
111        size: u64,
112        from: &mut dyn Read,
113    ) -> Result<ObjectId, gix::objs::write::Error> {
114        match self {
115            OutputWriter::Loose(db) => db.write_stream(kind, size, from),
116            OutputWriter::Sink(db) => db.write_stream(kind, size, from),
117        }
118    }
119}
120
121impl OutputWriter {
122    fn new(path: Option<impl AsRef<Path>>, compress: bool, object_hash: gix::hash::Kind) -> Self {
123        match path {
124            Some(path) => OutputWriter::Loose(loose::Store::at(path.as_ref(), object_hash)),
125            None => OutputWriter::Sink(odb::sink(object_hash).compress(compress)),
126        }
127    }
128}
129
130#[derive(Default)]
131pub struct Context {
132    pub thread_limit: Option<usize>,
133    pub delete_pack: bool,
134    pub sink_compress: bool,
135    pub verify: bool,
136    pub should_interrupt: Arc<AtomicBool>,
137    pub object_hash: gix::hash::Kind,
138}
139
140pub fn pack_or_pack_index(
141    pack_path: impl AsRef<Path>,
142    object_path: Option<impl AsRef<Path>>,
143    check: SafetyCheck,
144    mut progress: impl NestedProgress + 'static,
145    Context {
146        thread_limit,
147        delete_pack,
148        sink_compress,
149        verify,
150        should_interrupt,
151        object_hash,
152    }: Context,
153) -> Result<()> {
154    use anyhow::Context;
155
156    let path = pack_path.as_ref();
157    let bundle = pack::Bundle::at(path, object_hash).with_context(|| {
158        format!(
159            "Could not find .idx or .pack file from given file at '{}'",
160            path.display()
161        )
162    })?;
163
164    if !object_path.as_ref().is_none_or(|p| p.as_ref().is_dir()) {
165        return Err(anyhow!(
166            "The object directory at '{}' is inaccessible",
167            object_path
168                .expect("path present if no directory on disk")
169                .as_ref()
170                .display()
171        ));
172    }
173
174    let algorithm = object_path.as_ref().map_or_else(
175        || {
176            if sink_compress {
177                pack::index::traverse::Algorithm::Lookup
178            } else {
179                pack::index::traverse::Algorithm::DeltaTreeLookup
180            }
181        },
182        |_| pack::index::traverse::Algorithm::Lookup,
183    );
184
185    let pack::index::traverse::Outcome { .. } = bundle
186        .index
187        .traverse(
188            &bundle.pack,
189            &mut progress,
190            &should_interrupt,
191            {
192                let object_path = object_path.map(|p| p.as_ref().to_owned());
193                let out = OutputWriter::new(object_path.clone(), sink_compress, object_hash);
194                let loose_odb = verify
195                    .then(|| object_path.as_ref().map(|path| loose::Store::at(path, object_hash)))
196                    .flatten();
197                let mut read_buf = Vec::new();
198                move |object_kind, buf, index_entry, progress| {
199                    let written_id = out.write_buf(object_kind, buf).map_err(|err| Error::Write {
200                        source: err,
201                        kind: object_kind,
202                        id: index_entry.oid,
203                    })?;
204                    if let Err(err) = written_id.verify(&index_entry.oid) {
205                        if let object::Kind::Tree = object_kind {
206                            progress.info(format!(
207                                "The tree in pack named {} was written as {} due to modes 100664 and 100640 rewritten as 100644.",
208                                index_entry.oid, written_id
209                            ));
210                        } else {
211                            return Err(Error::ObjectEncodeMismatch {
212                                source: err,
213                                kind: object_kind,
214                            });
215                        }
216                    }
217                    if let Some(verifier) = loose_odb.as_ref() {
218                        let obj = verifier
219                            .try_find(&written_id, &mut read_buf)
220                            .map_err(|err| Error::WrittenFileCorrupt {
221                                source: err,
222                                id: written_id,
223                            })?
224                            .ok_or(Error::WrittenFileMissing { id: written_id })?;
225                        obj.verify_checksum(&written_id)?;
226                    }
227                    Ok(())
228                }
229            },
230            pack::index::traverse::Options {
231                traversal: algorithm,
232                thread_limit,
233                check: check.into(),
234                make_pack_lookup_cache: pack::cache::lru::StaticLinkedList::<64>::default,
235            },
236        )
237        .with_context(|| "Failed to explode the entire pack - some loose objects may have been created nonetheless")?;
238
239    let (index_path, data_path) = (bundle.index.path().to_owned(), bundle.pack.path().to_owned());
240    drop(bundle);
241
242    if delete_pack {
243        fs::remove_file(&index_path)
244            .and_then(|_| fs::remove_file(&data_path))
245            .with_context(|| {
246                format!(
247                    "Failed to delete pack index file at '{} or data file at '{}'",
248                    index_path.display(),
249                    data_path.display()
250                )
251            })?;
252        progress.info(format!(
253            "Removed '{}' and '{}'",
254            index_path.display(),
255            data_path.display()
256        ));
257    }
258    Ok(())
259}