Skip to main content

gitoxide_core/pack/
verify.rs

1use std::{ffi::OsStr, io, path::Path, str::FromStr, sync::atomic::AtomicBool};
2
3use anyhow::{Context as AnyhowContext, Result, anyhow};
4use bytesize::ByteSize;
5use gix::{
6    NestedProgress, object, odb,
7    odb::{pack, pack::index},
8};
9pub use index::verify::Mode;
10pub const PROGRESS_RANGE: std::ops::RangeInclusive<u8> = 1..=2;
11
12use crate::OutputFormat;
13
14#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
15pub enum Algorithm {
16    LessTime,
17    LessMemory,
18}
19
20impl Algorithm {
21    pub fn variants() -> &'static [&'static str] {
22        &["less-time", "less-memory"]
23    }
24}
25
26impl FromStr for Algorithm {
27    type Err = String;
28
29    fn from_str(s: &str) -> Result<Self, Self::Err> {
30        let s_lc = s.to_ascii_lowercase();
31        Ok(match s_lc.as_str() {
32            "less-memory" => Algorithm::LessMemory,
33            "less-time" => Algorithm::LessTime,
34            _ => return Err(format!("Invalid verification algorithm: '{s}'")),
35        })
36    }
37}
38
39impl From<Algorithm> for index::traverse::Algorithm {
40    fn from(v: Algorithm) -> Self {
41        match v {
42            Algorithm::LessMemory => index::traverse::Algorithm::Lookup,
43            Algorithm::LessTime => index::traverse::Algorithm::DeltaTreeLookup,
44        }
45    }
46}
47
48/// A general purpose context for many operations provided here
49pub struct Context<'a, W1: io::Write, W2: io::Write> {
50    /// If set, provide statistics to `out` in the given format
51    pub output_statistics: Option<OutputFormat>,
52    /// A stream to which to output operation results
53    pub out: W1,
54    /// A stream to which to errors
55    pub err: W2,
56    /// If set, don't use more than this amount of threads.
57    /// Otherwise, usually use as many threads as there are logical cores.
58    /// A value of 0 is interpreted as no-limit
59    pub thread_limit: Option<usize>,
60    pub mode: index::verify::Mode,
61    pub algorithm: Algorithm,
62    pub should_interrupt: &'a AtomicBool,
63    pub object_hash: gix::hash::Kind,
64}
65
66enum EitherCache<const SIZE: usize> {
67    Left(pack::cache::Never),
68    Right(pack::cache::lru::StaticLinkedList<SIZE>),
69}
70
71impl<const SIZE: usize> pack::cache::DecodeEntry for EitherCache<SIZE> {
72    fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: object::Kind, compressed_size: usize) {
73        match self {
74            EitherCache::Left(v) => v.put(pack_id, offset, data, kind, compressed_size),
75            EitherCache::Right(v) => v.put(pack_id, offset, data, kind, compressed_size),
76        }
77    }
78
79    fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(object::Kind, usize)> {
80        match self {
81            EitherCache::Left(v) => v.get(pack_id, offset, out),
82            EitherCache::Right(v) => v.get(pack_id, offset, out),
83        }
84    }
85}
86
87pub fn pack_or_pack_index<W1, W2>(
88    path: impl AsRef<Path>,
89    mut progress: impl NestedProgress + 'static,
90    Context {
91        mut out,
92        mut err,
93        mode,
94        output_statistics,
95        thread_limit,
96        algorithm,
97        should_interrupt,
98        object_hash,
99    }: Context<'_, W1, W2>,
100) -> Result<()>
101where
102    W1: io::Write,
103    W2: io::Write,
104{
105    let path = path.as_ref();
106    let ext = path.extension().and_then(OsStr::to_str).unwrap_or("");
107    const CACHE_SIZE: usize = 64;
108    let cache = || -> EitherCache<CACHE_SIZE> {
109        if matches!(algorithm, Algorithm::LessMemory) {
110            if output_statistics.is_some() {
111                // turn off acceleration as we need to see entire chains all the time
112                EitherCache::Left(pack::cache::Never)
113            } else {
114                EitherCache::Right(pack::cache::lru::StaticLinkedList::<CACHE_SIZE>::default())
115            }
116        } else {
117            EitherCache::Left(pack::cache::Never)
118        }
119    };
120    let res = match ext {
121        "pack" => {
122            let pack = odb::pack::data::File::at(path, object_hash).with_context(|| "Could not open pack file")?;
123            pack.verify_checksum(&mut progress.add_child("Sha1 of pack"), should_interrupt)
124                .map(|id| (id, None))?
125        }
126        "idx" => {
127            let idx =
128                odb::pack::index::File::at(path, object_hash).with_context(|| "Could not open pack index file")?;
129            let packfile_path = path.with_extension("pack");
130            let pack = odb::pack::data::File::at(&packfile_path, object_hash)
131                .map_err(|e| {
132                    writeln!(
133                        err,
134                        "Could not find matching pack file at '{}' - only index file will be verified, error was: {}",
135                        packfile_path.display(),
136                        e
137                    )
138                    .ok();
139                    e
140                })
141                .ok();
142
143            idx.verify_integrity(
144                pack.as_ref().map(|p| gix::odb::pack::index::verify::PackContext {
145                    data: p,
146                    options: gix::odb::pack::index::verify::integrity::Options {
147                        verify_mode: mode,
148                        traversal: algorithm.into(),
149                        make_pack_lookup_cache: cache,
150                        thread_limit,
151                    },
152                }),
153                &mut progress,
154                should_interrupt,
155            )
156            .map(|o| (o.actual_index_checksum, o.pack_traverse_statistics))
157            .with_context(|| "Verification failure")?
158        }
159        "" => match path.file_name() {
160            Some(file_name) if file_name == "multi-pack-index" => {
161                let multi_index = gix::odb::pack::multi_index::File::at(path, None)?;
162                let res = multi_index.verify_integrity(
163                    &mut progress,
164                    should_interrupt,
165                    gix::odb::pack::index::verify::integrity::Options {
166                        verify_mode: mode,
167                        traversal: algorithm.into(),
168                        thread_limit,
169                        make_pack_lookup_cache: cache,
170                    },
171                )?;
172                match output_statistics {
173                    Some(OutputFormat::Human) => {
174                        for (index_name, stats) in multi_index.index_names().iter().zip(res.pack_traverse_statistics) {
175                            writeln!(out, "{}", index_name.display()).ok();
176                            drop(print_statistics(&mut out, &stats));
177                        }
178                    }
179                    #[cfg(feature = "serde")]
180                    Some(OutputFormat::Json) => serde_json::to_writer_pretty(
181                        out,
182                        &multi_index
183                            .index_names()
184                            .iter()
185                            .zip(res.pack_traverse_statistics)
186                            .collect::<Vec<_>>(),
187                    )?,
188                    _ => {}
189                }
190                return Ok(());
191            }
192            _ => {
193                return Err(anyhow!(
194                    "Cannot determine data type on path without extension '{}', expecting default extensions 'idx' and 'pack'",
195                    path.display()
196                ));
197            }
198        },
199        ext => return Err(anyhow!("Unknown extension {ext:?}, expecting 'idx' or 'pack'")),
200    };
201    if let Some(stats) = res.1.as_ref() {
202        #[cfg_attr(not(feature = "serde"), allow(clippy::single_match))]
203        match output_statistics {
204            Some(OutputFormat::Human) => drop(print_statistics(&mut out, stats)),
205            #[cfg(feature = "serde")]
206            Some(OutputFormat::Json) => serde_json::to_writer_pretty(out, stats)?,
207            _ => {}
208        }
209    }
210    Ok(())
211}
212
213fn print_statistics(out: &mut impl io::Write, stats: &index::traverse::Statistics) -> io::Result<()> {
214    writeln!(out, "objects per delta chain length")?;
215    let mut chain_length_to_object: Vec<_> = stats.objects_per_chain_length.iter().map(|(a, b)| (*a, *b)).collect();
216    chain_length_to_object.sort_by_key(|e| e.0);
217    let mut total_object_count = 0;
218    for (chain_length, object_count) in chain_length_to_object.into_iter() {
219        total_object_count += object_count;
220        writeln!(out, "\t{chain_length:>2}: {object_count}")?;
221    }
222    writeln!(out, "\t->: {total_object_count}")?;
223
224    let pack::data::decode::entry::Outcome {
225        kind: _,
226        num_deltas,
227        decompressed_size,
228        compressed_size,
229        object_size,
230    } = stats.average;
231
232    let width = 30;
233    writeln!(out, "\naverages")?;
234    #[rustfmt::skip]
235    writeln!(
236        out,
237        "\t{:<width$} {};\n\t{:<width$} {};\n\t{:<width$} {};\n\t{:<width$} {};",
238        "delta chain length:", num_deltas,
239        "decompressed entry [B]:", decompressed_size,
240        "compressed entry [B]:", compressed_size,
241        "decompressed object size [B]:", object_size,
242        width = width
243    )?;
244
245    writeln!(out, "\ncompression")?;
246    #[rustfmt::skip]
247    writeln!(
248        out, "\t{:<width$}: {}\n\t{:<width$}: {}\n\t{:<width$}: {}\n\t{:<width$}: {}",
249        "compressed entries size", ByteSize(stats.total_compressed_entries_size).display().si(),
250        "decompressed entries size", ByteSize(stats.total_decompressed_entries_size).display().si(),
251        "total object size", ByteSize(stats.total_object_size).display().si(),
252        "pack size", ByteSize(stats.pack_size).display().si(),
253        width = width
254    )?;
255    #[rustfmt::skip]
256    writeln!(
257        out,
258        "\n\t{:<width$}: {}\n\t{:<width$}: {}\n\t{:<width$}: {}\n\t{:<width$}: {}",
259        "num trees", stats.num_trees,
260        "num blobs", stats.num_blobs,
261        "num commits", stats.num_commits,
262        "num tags", stats.num_tags,
263        width = width
264    )?;
265    let compression_ratio = stats.total_decompressed_entries_size as f64 / stats.total_compressed_entries_size as f64;
266    let delta_compression_ratio = stats.total_object_size as f64 / stats.total_compressed_entries_size as f64;
267    #[rustfmt::skip]
268    writeln!(
269        out,
270        "\n\t{:<width$}: {:.2}\n\t{:<width$}: {:.2}\n\t{:<width$}: {:.2}\n\t{:<width$}: {:.3}%",
271        "compression ratio", compression_ratio,
272        "delta compression ratio", delta_compression_ratio,
273        "delta gain", delta_compression_ratio / compression_ratio,
274        "pack overhead", (1.0 - (stats.total_compressed_entries_size as f64 / stats.pack_size as f64)) * 100.0,
275        width = width
276    )?;
277    Ok(())
278}