gitoxide_core/pack/
verify.rs

1use std::{ffi::OsStr, io, path::Path, str::FromStr, sync::atomic::AtomicBool};
2
3use anyhow::{anyhow, Context as AnyhowContext, Result};
4use bytesize::ByteSize;
5use gix::{
6    object, odb,
7    odb::{pack, pack::index},
8    NestedProgress,
9};
10pub use index::verify::Mode;
11pub const PROGRESS_RANGE: std::ops::RangeInclusive<u8> = 1..=2;
12
13use crate::OutputFormat;
14
15#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
16pub enum Algorithm {
17    LessTime,
18    LessMemory,
19}
20
21impl Algorithm {
22    pub fn variants() -> &'static [&'static str] {
23        &["less-time", "less-memory"]
24    }
25}
26
27impl FromStr for Algorithm {
28    type Err = String;
29
30    fn from_str(s: &str) -> Result<Self, Self::Err> {
31        let s_lc = s.to_ascii_lowercase();
32        Ok(match s_lc.as_str() {
33            "less-memory" => Algorithm::LessMemory,
34            "less-time" => Algorithm::LessTime,
35            _ => return Err(format!("Invalid verification algorithm: '{s}'")),
36        })
37    }
38}
39
40impl From<Algorithm> for index::traverse::Algorithm {
41    fn from(v: Algorithm) -> Self {
42        match v {
43            Algorithm::LessMemory => index::traverse::Algorithm::Lookup,
44            Algorithm::LessTime => index::traverse::Algorithm::DeltaTreeLookup,
45        }
46    }
47}
48
49/// A general purpose context for many operations provided here
50pub struct Context<'a, W1: io::Write, W2: io::Write> {
51    /// If set, provide statistics to `out` in the given format
52    pub output_statistics: Option<OutputFormat>,
53    /// A stream to which to output operation results
54    pub out: W1,
55    /// A stream to which to errors
56    pub err: W2,
57    /// If set, don't use more than this amount of threads.
58    /// Otherwise, usually use as many threads as there are logical cores.
59    /// A value of 0 is interpreted as no-limit
60    pub thread_limit: Option<usize>,
61    pub mode: index::verify::Mode,
62    pub algorithm: Algorithm,
63    pub should_interrupt: &'a AtomicBool,
64    pub object_hash: gix::hash::Kind,
65}
66
67enum EitherCache<const SIZE: usize> {
68    Left(pack::cache::Never),
69    Right(pack::cache::lru::StaticLinkedList<SIZE>),
70}
71
72impl<const SIZE: usize> pack::cache::DecodeEntry for EitherCache<SIZE> {
73    fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: object::Kind, compressed_size: usize) {
74        match self {
75            EitherCache::Left(v) => v.put(pack_id, offset, data, kind, compressed_size),
76            EitherCache::Right(v) => v.put(pack_id, offset, data, kind, compressed_size),
77        }
78    }
79
80    fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(object::Kind, usize)> {
81        match self {
82            EitherCache::Left(v) => v.get(pack_id, offset, out),
83            EitherCache::Right(v) => v.get(pack_id, offset, out),
84        }
85    }
86}
87
88pub fn pack_or_pack_index<W1, W2>(
89    path: impl AsRef<Path>,
90    mut progress: impl NestedProgress + 'static,
91    Context {
92        mut out,
93        mut err,
94        mode,
95        output_statistics,
96        thread_limit,
97        algorithm,
98        should_interrupt,
99        object_hash,
100    }: Context<'_, W1, W2>,
101) -> Result<()>
102where
103    W1: io::Write,
104    W2: io::Write,
105{
106    let path = path.as_ref();
107    let ext = path.extension().and_then(OsStr::to_str).unwrap_or("");
108    const CACHE_SIZE: usize = 64;
109    let cache = || -> EitherCache<CACHE_SIZE> {
110        if matches!(algorithm, Algorithm::LessMemory) {
111            if output_statistics.is_some() {
112                // turn off acceleration as we need to see entire chains all the time
113                EitherCache::Left(pack::cache::Never)
114            } else {
115                EitherCache::Right(pack::cache::lru::StaticLinkedList::<CACHE_SIZE>::default())
116            }
117        } else {
118            EitherCache::Left(pack::cache::Never)
119        }
120    };
121    let res = match ext {
122        "pack" => {
123            let pack = odb::pack::data::File::at(path, object_hash).with_context(|| "Could not open pack file")?;
124            pack.verify_checksum(&mut progress.add_child("Sha1 of pack"), should_interrupt)
125                .map(|id| (id, None))?
126        }
127        "idx" => {
128            let idx =
129                odb::pack::index::File::at(path, object_hash).with_context(|| "Could not open pack index file")?;
130            let packfile_path = path.with_extension("pack");
131            let pack = odb::pack::data::File::at(&packfile_path, object_hash)
132                .map_err(|e| {
133                    writeln!(
134                        err,
135                        "Could not find matching pack file at '{}' - only index file will be verified, error was: {}",
136                        packfile_path.display(),
137                        e
138                    )
139                    .ok();
140                    e
141                })
142                .ok();
143
144            idx.verify_integrity(
145                pack.as_ref().map(|p| gix::odb::pack::index::verify::PackContext {
146                    data: p,
147                    options: gix::odb::pack::index::verify::integrity::Options {
148                        verify_mode: mode,
149                        traversal: algorithm.into(),
150                        make_pack_lookup_cache: cache,
151                        thread_limit
152                    }
153                }),
154                &mut progress,
155                should_interrupt,
156            )
157            .map(|o| (o.actual_index_checksum, o.pack_traverse_statistics))
158            .with_context(|| "Verification failure")?
159        }
160        "" => {
161            match path.file_name() {
162                Some(file_name) if file_name == "multi-pack-index" => {
163                    let multi_index = gix::odb::pack::multi_index::File::at(path)?;
164                    let res = multi_index.verify_integrity(&mut progress, should_interrupt, gix::odb::pack::index::verify::integrity::Options{
165                        verify_mode: mode,
166                        traversal: algorithm.into(),
167                        thread_limit,
168                        make_pack_lookup_cache: cache
169                    })?;
170                    match output_statistics {
171                        Some(OutputFormat::Human) => {
172                            for (index_name, stats) in multi_index.index_names().iter().zip(res.pack_traverse_statistics) {
173                                writeln!(out, "{}", index_name.display()).ok();
174                                drop(print_statistics(&mut out, &stats));
175                            }
176                        },
177                        #[cfg(feature = "serde")]
178                        Some(OutputFormat::Json) => serde_json::to_writer_pretty(out, &multi_index.index_names().iter().zip(res.pack_traverse_statistics).collect::<Vec<_>>())?,
179                        _ => {}
180                    }
181                    return Ok(())
182                },
183                _ => return Err(anyhow!(
184                        "Cannot determine data type on path without extension '{}', expecting default extensions 'idx' and 'pack'",
185                        path.display()
186                    ))
187            }
188        }
189        ext => return Err(anyhow!("Unknown extension {:?}, expecting 'idx' or 'pack'", ext)),
190    };
191    if let Some(stats) = res.1.as_ref() {
192        #[cfg_attr(not(feature = "serde"), allow(clippy::single_match))]
193        match output_statistics {
194            Some(OutputFormat::Human) => drop(print_statistics(&mut out, stats)),
195            #[cfg(feature = "serde")]
196            Some(OutputFormat::Json) => serde_json::to_writer_pretty(out, stats)?,
197            _ => {}
198        }
199    }
200    Ok(())
201}
202
203fn print_statistics(out: &mut impl io::Write, stats: &index::traverse::Statistics) -> io::Result<()> {
204    writeln!(out, "objects per delta chain length")?;
205    let mut chain_length_to_object: Vec<_> = stats.objects_per_chain_length.iter().map(|(a, b)| (*a, *b)).collect();
206    chain_length_to_object.sort_by_key(|e| e.0);
207    let mut total_object_count = 0;
208    for (chain_length, object_count) in chain_length_to_object.into_iter() {
209        total_object_count += object_count;
210        writeln!(out, "\t{chain_length:>2}: {object_count}")?;
211    }
212    writeln!(out, "\t->: {total_object_count}")?;
213
214    let pack::data::decode::entry::Outcome {
215        kind: _,
216        num_deltas,
217        decompressed_size,
218        compressed_size,
219        object_size,
220    } = stats.average;
221
222    let width = 30;
223    writeln!(out, "\naverages")?;
224    #[rustfmt::skip]
225    writeln!(
226        out,
227        "\t{:<width$} {};\n\t{:<width$} {};\n\t{:<width$} {};\n\t{:<width$} {};",
228        "delta chain length:", num_deltas,
229        "decompressed entry [B]:", decompressed_size,
230        "compressed entry [B]:", compressed_size,
231        "decompressed object size [B]:", object_size,
232        width = width
233    )?;
234
235    writeln!(out, "\ncompression")?;
236    #[rustfmt::skip]
237    writeln!(
238        out, "\t{:<width$}: {}\n\t{:<width$}: {}\n\t{:<width$}: {}\n\t{:<width$}: {}",
239        "compressed entries size", ByteSize(stats.total_compressed_entries_size).display().si(),
240        "decompressed entries size", ByteSize(stats.total_decompressed_entries_size).display().si(),
241        "total object size", ByteSize(stats.total_object_size).display().si(),
242        "pack size", ByteSize(stats.pack_size).display().si(),
243        width = width
244    )?;
245    #[rustfmt::skip]
246    writeln!(
247        out,
248        "\n\t{:<width$}: {}\n\t{:<width$}: {}\n\t{:<width$}: {}\n\t{:<width$}: {}",
249        "num trees", stats.num_trees,
250        "num blobs", stats.num_blobs,
251        "num commits", stats.num_commits,
252        "num tags", stats.num_tags,
253        width = width
254    )?;
255    let compression_ratio = stats.total_decompressed_entries_size as f64 / stats.total_compressed_entries_size as f64;
256    let delta_compression_ratio = stats.total_object_size as f64 / stats.total_compressed_entries_size as f64;
257    #[rustfmt::skip]
258    writeln!(
259        out,
260        "\n\t{:<width$}: {:.2}\n\t{:<width$}: {:.2}\n\t{:<width$}: {:.2}\n\t{:<width$}: {:.3}%",
261        "compression ratio", compression_ratio,
262        "delta compression ratio", delta_compression_ratio,
263        "delta gain", delta_compression_ratio / compression_ratio,
264        "pack overhead", (1.0 - (stats.total_compressed_entries_size as f64 / stats.pack_size as f64)) * 100.0,
265        width = width
266    )?;
267    Ok(())
268}