1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
use crate::OutputFormat;
use anyhow::{anyhow, Context as AnyhowContext, Result};
use bytesize::ByteSize;
use git_features::progress::{self, Progress};
use git_object::{owned, Kind};
use git_odb::pack::{self, index};
use std::{io, path::Path, str::FromStr};

pub use index::verify::Mode;

#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
pub enum Algorithm {
    LessTime,
    LessMemory,
}

impl Algorithm {
    pub fn variants() -> &'static [&'static str] {
        &["less-time", "less-memory"]
    }
}

impl FromStr for Algorithm {
    type Err = String;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        let s_lc = s.to_ascii_lowercase();
        Ok(match s_lc.as_str() {
            "less-memory" => Algorithm::LessMemory,
            "less-time" => Algorithm::LessTime,
            _ => return Err(format!("Invalid verification algorithm: '{}'", s)),
        })
    }
}

impl From<Algorithm> for index::traverse::Algorithm {
    fn from(v: Algorithm) -> Self {
        match v {
            Algorithm::LessMemory => index::traverse::Algorithm::Lookup,
            Algorithm::LessTime => index::traverse::Algorithm::DeltaTreeLookup,
        }
    }
}

/// A general purpose context for many operations provided here
pub struct Context<W1: io::Write, W2: io::Write> {
    /// If set, provide statistics to `out` in the given format
    pub output_statistics: Option<OutputFormat>,
    /// A stream to which to output operation results
    pub out: W1,
    /// A stream to which to errors
    pub err: W2,
    /// If set, don't use more than this amount of threads.
    /// Otherwise, usually use as many threads as there are logical cores.
    /// A value of 0 is interpreted as no-limit
    pub thread_limit: Option<usize>,
    pub mode: index::verify::Mode,
    pub algorithm: Algorithm,
}

impl Default for Context<Vec<u8>, Vec<u8>> {
    fn default() -> Self {
        Context {
            output_statistics: None,
            thread_limit: None,
            mode: index::verify::Mode::Sha1CRC32,
            algorithm: Algorithm::LessMemory,
            out: Vec::new(),
            err: Vec::new(),
        }
    }
}

#[allow(clippy::large_enum_variant)]
enum EitherCache {
    Left(pack::cache::DecodeEntryNoop),
    Right(pack::cache::DecodeEntryLRU),
}

impl pack::cache::DecodeEntry for EitherCache {
    fn put(&mut self, offset: u64, data: &[u8], kind: Kind, compressed_size: usize) {
        match self {
            EitherCache::Left(v) => v.put(offset, data, kind, compressed_size),
            EitherCache::Right(v) => v.put(offset, data, kind, compressed_size),
        }
    }

    fn get(&mut self, offset: u64, out: &mut Vec<u8>) -> Option<(Kind, usize)> {
        match self {
            EitherCache::Left(v) => v.get(offset, out),
            EitherCache::Right(v) => v.get(offset, out),
        }
    }
}

pub fn pack_or_pack_index<P, W1, W2>(
    path: impl AsRef<Path>,
    progress: Option<P>,
    Context {
        mut out,
        mut err,
        mode,
        output_statistics,
        thread_limit,
        algorithm,
    }: Context<W1, W2>,
) -> Result<(owned::Id, Option<index::traverse::Outcome>)>
where
    P: Progress + Send,
    <P as Progress>::SubProgress: Send,
    W1: io::Write,
    W2: io::Write,
    <<P as Progress>::SubProgress as Progress>::SubProgress: Send,
{
    let path = path.as_ref();
    let ext = path.extension().and_then(|ext| ext.to_str()).ok_or_else(|| {
        anyhow!(
            "Cannot determine data type on path without extension '{}', expecting default extensions 'idx' and 'pack'",
            path.display()
        )
    })?;
    let res = match ext {
        "pack" => {
            let pack = git_odb::pack::data::File::at(path).with_context(|| "Could not open pack file")?;
            pack.verify_checksum(progress::DoOrDiscard::from(progress).add_child("Sha1 of pack"))
                .map(|id| (id, None))?
        }
        "idx" => {
            let idx = git_odb::pack::index::File::at(path).with_context(|| "Could not open pack index file")?;
            let packfile_path = path.with_extension("pack");
            let pack = git_odb::pack::data::File::at(&packfile_path)
                .map_err(|e| {
                    writeln!(
                        err,
                        "Could not find matching pack file at '{}' - only index file will be verified, error was: {}",
                        packfile_path.display(),
                        e
                    )
                    .ok();
                    e
                })
                .ok();
            let cache = || -> EitherCache {
                if output_statistics.is_some() {
                    // turn off acceleration as we need to see entire chains all the time
                    EitherCache::Left(pack::cache::DecodeEntryNoop)
                } else {
                    EitherCache::Right(pack::cache::DecodeEntryLRU::default())
                }
            };

            idx.verify_integrity(
                pack.as_ref().map(|p| (p, mode, algorithm.into())),
                thread_limit,
                progress,
                cache,
            )
            .map(|(a, b, _)| (a, b))
            .with_context(|| "Verification failure")?
        }
        ext => return Err(anyhow!("Unknown extension {:?}, expecting 'idx' or 'pack'", ext)),
    };
    if let Some(stats) = res.1.as_ref() {
        match output_statistics {
            Some(OutputFormat::Human) => drop(print_statistics(&mut out, stats)),
            #[cfg(feature = "serde1")]
            Some(OutputFormat::Json) => serde_json::to_writer_pretty(out, stats)?,
            _ => {}
        };
    }
    Ok(res)
}

fn print_statistics(out: &mut impl io::Write, stats: &index::traverse::Outcome) -> io::Result<()> {
    writeln!(out, "objects per delta chain length")?;
    let mut chain_length_to_object: Vec<_> = stats.objects_per_chain_length.iter().map(|(a, b)| (*a, *b)).collect();
    chain_length_to_object.sort_by_key(|e| e.0);
    let mut total_object_count = 0;
    for (chain_length, object_count) in chain_length_to_object.into_iter() {
        total_object_count += object_count;
        writeln!(out, "\t{:>2}: {}", chain_length, object_count)?;
    }
    writeln!(out, "\t->: {}", total_object_count)?;

    let pack::data::decode::Outcome {
        kind: _,
        num_deltas,
        decompressed_size,
        compressed_size,
        object_size,
    } = stats.average;

    let width = 30;
    writeln!(out, "\naverages")?;
    #[rustfmt::skip]
    writeln!(
        out,
        "\t{:<width$} {};\n\t{:<width$} {};\n\t{:<width$} {};\n\t{:<width$} {};",
        "delta chain length:", num_deltas,
        "decompressed entry [B]:", decompressed_size,
        "compressed entry [B]:", compressed_size,
        "decompressed object size [B]:", object_size,
        width = width
    )?;

    writeln!(out, "\ncompression")?;
    #[rustfmt::skip]
    writeln!(
        out, "\t{:<width$}: {}\n\t{:<width$}: {}\n\t{:<width$}: {}\n\t{:<width$}: {}",
        "compressed entries size", ByteSize(stats.total_compressed_entries_size),
        "decompressed entries size", ByteSize(stats.total_decompressed_entries_size),
        "total object size", ByteSize(stats.total_object_size),
        "pack size", ByteSize(stats.pack_size),
        width = width
    )?;
    #[rustfmt::skip]
    writeln!(
        out,
        "\n\t{:<width$}: {}\n\t{:<width$}: {}\n\t{:<width$}: {}\n\t{:<width$}: {}",
        "num trees", stats.num_trees,
        "num blobs", stats.num_blobs,
        "num commits", stats.num_commits,
        "num tags", stats.num_tags,
        width = width
    )?;
    let compression_ratio = stats.total_decompressed_entries_size as f64 / stats.total_compressed_entries_size as f64;
    let delta_compression_ratio = stats.total_object_size as f64 / stats.total_compressed_entries_size as f64;
    #[rustfmt::skip]
    writeln!(
        out,
        "\n\t{:<width$}: {:.2}\n\t{:<width$}: {:.2}\n\t{:<width$}: {:.2}\n\t{:<width$}: {:.3}%",
        "compression ratio", compression_ratio,
        "delta compression ratio", delta_compression_ratio,
        "delta gain", delta_compression_ratio / compression_ratio,
        "pack overhead", (1.0 - (stats.total_compressed_entries_size as f64 / stats.pack_size as f64)) * 100.0,
        width = width
    )?;
    Ok(())
}