1use std::{ffi::OsStr, io, path::Path, str::FromStr, sync::atomic::AtomicBool};
2
3use anyhow::{Context as AnyhowContext, Result, anyhow};
4use bytesize::ByteSize;
5use gix::{
6 NestedProgress, object, odb,
7 odb::{pack, pack::index},
8};
9pub use index::verify::Mode;
10pub const PROGRESS_RANGE: std::ops::RangeInclusive<u8> = 1..=2;
11
12use crate::OutputFormat;
13
14#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
15pub enum Algorithm {
16 LessTime,
17 LessMemory,
18}
19
20impl Algorithm {
21 pub fn variants() -> &'static [&'static str] {
22 &["less-time", "less-memory"]
23 }
24}
25
26impl FromStr for Algorithm {
27 type Err = String;
28
29 fn from_str(s: &str) -> Result<Self, Self::Err> {
30 let s_lc = s.to_ascii_lowercase();
31 Ok(match s_lc.as_str() {
32 "less-memory" => Algorithm::LessMemory,
33 "less-time" => Algorithm::LessTime,
34 _ => return Err(format!("Invalid verification algorithm: '{s}'")),
35 })
36 }
37}
38
39impl From<Algorithm> for index::traverse::Algorithm {
40 fn from(v: Algorithm) -> Self {
41 match v {
42 Algorithm::LessMemory => index::traverse::Algorithm::Lookup,
43 Algorithm::LessTime => index::traverse::Algorithm::DeltaTreeLookup,
44 }
45 }
46}
47
48pub struct Context<'a, W1: io::Write, W2: io::Write> {
50 pub output_statistics: Option<OutputFormat>,
52 pub out: W1,
54 pub err: W2,
56 pub thread_limit: Option<usize>,
60 pub mode: index::verify::Mode,
61 pub algorithm: Algorithm,
62 pub should_interrupt: &'a AtomicBool,
63 pub object_hash: gix::hash::Kind,
64}
65
66enum EitherCache<const SIZE: usize> {
67 Left(pack::cache::Never),
68 Right(pack::cache::lru::StaticLinkedList<SIZE>),
69}
70
71impl<const SIZE: usize> pack::cache::DecodeEntry for EitherCache<SIZE> {
72 fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: object::Kind, compressed_size: usize) {
73 match self {
74 EitherCache::Left(v) => v.put(pack_id, offset, data, kind, compressed_size),
75 EitherCache::Right(v) => v.put(pack_id, offset, data, kind, compressed_size),
76 }
77 }
78
79 fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(object::Kind, usize)> {
80 match self {
81 EitherCache::Left(v) => v.get(pack_id, offset, out),
82 EitherCache::Right(v) => v.get(pack_id, offset, out),
83 }
84 }
85}
86
87pub fn pack_or_pack_index<W1, W2>(
88 path: impl AsRef<Path>,
89 mut progress: impl NestedProgress + 'static,
90 Context {
91 mut out,
92 mut err,
93 mode,
94 output_statistics,
95 thread_limit,
96 algorithm,
97 should_interrupt,
98 object_hash,
99 }: Context<'_, W1, W2>,
100) -> Result<()>
101where
102 W1: io::Write,
103 W2: io::Write,
104{
105 let path = path.as_ref();
106 let ext = path.extension().and_then(OsStr::to_str).unwrap_or("");
107 const CACHE_SIZE: usize = 64;
108 let cache = || -> EitherCache<CACHE_SIZE> {
109 if matches!(algorithm, Algorithm::LessMemory) {
110 if output_statistics.is_some() {
111 EitherCache::Left(pack::cache::Never)
113 } else {
114 EitherCache::Right(pack::cache::lru::StaticLinkedList::<CACHE_SIZE>::default())
115 }
116 } else {
117 EitherCache::Left(pack::cache::Never)
118 }
119 };
120 let res = match ext {
121 "pack" => {
122 let pack = odb::pack::data::File::at(path, object_hash).with_context(|| "Could not open pack file")?;
123 pack.verify_checksum(&mut progress.add_child("Sha1 of pack"), should_interrupt)
124 .map(|id| (id, None))?
125 }
126 "idx" => {
127 let idx =
128 odb::pack::index::File::at(path, object_hash).with_context(|| "Could not open pack index file")?;
129 let packfile_path = path.with_extension("pack");
130 let pack = odb::pack::data::File::at(&packfile_path, object_hash)
131 .map_err(|e| {
132 writeln!(
133 err,
134 "Could not find matching pack file at '{}' - only index file will be verified, error was: {}",
135 packfile_path.display(),
136 e
137 )
138 .ok();
139 e
140 })
141 .ok();
142
143 idx.verify_integrity(
144 pack.as_ref().map(|p| gix::odb::pack::index::verify::PackContext {
145 data: p,
146 options: gix::odb::pack::index::verify::integrity::Options {
147 verify_mode: mode,
148 traversal: algorithm.into(),
149 make_pack_lookup_cache: cache,
150 thread_limit,
151 },
152 }),
153 &mut progress,
154 should_interrupt,
155 )
156 .map(|o| (o.actual_index_checksum, o.pack_traverse_statistics))
157 .with_context(|| "Verification failure")?
158 }
159 "" => match path.file_name() {
160 Some(file_name) if file_name == "multi-pack-index" => {
161 let multi_index = gix::odb::pack::multi_index::File::at(path, None)?;
162 let res = multi_index.verify_integrity(
163 &mut progress,
164 should_interrupt,
165 gix::odb::pack::index::verify::integrity::Options {
166 verify_mode: mode,
167 traversal: algorithm.into(),
168 thread_limit,
169 make_pack_lookup_cache: cache,
170 },
171 )?;
172 match output_statistics {
173 Some(OutputFormat::Human) => {
174 for (index_name, stats) in multi_index.index_names().iter().zip(res.pack_traverse_statistics) {
175 writeln!(out, "{}", index_name.display()).ok();
176 drop(print_statistics(&mut out, &stats));
177 }
178 }
179 #[cfg(feature = "serde")]
180 Some(OutputFormat::Json) => serde_json::to_writer_pretty(
181 out,
182 &multi_index
183 .index_names()
184 .iter()
185 .zip(res.pack_traverse_statistics)
186 .collect::<Vec<_>>(),
187 )?,
188 _ => {}
189 }
190 return Ok(());
191 }
192 _ => {
193 return Err(anyhow!(
194 "Cannot determine data type on path without extension '{}', expecting default extensions 'idx' and 'pack'",
195 path.display()
196 ));
197 }
198 },
199 ext => return Err(anyhow!("Unknown extension {ext:?}, expecting 'idx' or 'pack'")),
200 };
201 if let Some(stats) = res.1.as_ref() {
202 #[cfg_attr(not(feature = "serde"), allow(clippy::single_match))]
203 match output_statistics {
204 Some(OutputFormat::Human) => drop(print_statistics(&mut out, stats)),
205 #[cfg(feature = "serde")]
206 Some(OutputFormat::Json) => serde_json::to_writer_pretty(out, stats)?,
207 _ => {}
208 }
209 }
210 Ok(())
211}
212
213fn print_statistics(out: &mut impl io::Write, stats: &index::traverse::Statistics) -> io::Result<()> {
214 writeln!(out, "objects per delta chain length")?;
215 let mut chain_length_to_object: Vec<_> = stats.objects_per_chain_length.iter().map(|(a, b)| (*a, *b)).collect();
216 chain_length_to_object.sort_by_key(|e| e.0);
217 let mut total_object_count = 0;
218 for (chain_length, object_count) in chain_length_to_object.into_iter() {
219 total_object_count += object_count;
220 writeln!(out, "\t{chain_length:>2}: {object_count}")?;
221 }
222 writeln!(out, "\t->: {total_object_count}")?;
223
224 let pack::data::decode::entry::Outcome {
225 kind: _,
226 num_deltas,
227 decompressed_size,
228 compressed_size,
229 object_size,
230 } = stats.average;
231
232 let width = 30;
233 writeln!(out, "\naverages")?;
234 #[rustfmt::skip]
235 writeln!(
236 out,
237 "\t{:<width$} {};\n\t{:<width$} {};\n\t{:<width$} {};\n\t{:<width$} {};",
238 "delta chain length:", num_deltas,
239 "decompressed entry [B]:", decompressed_size,
240 "compressed entry [B]:", compressed_size,
241 "decompressed object size [B]:", object_size,
242 width = width
243 )?;
244
245 writeln!(out, "\ncompression")?;
246 #[rustfmt::skip]
247 writeln!(
248 out, "\t{:<width$}: {}\n\t{:<width$}: {}\n\t{:<width$}: {}\n\t{:<width$}: {}",
249 "compressed entries size", ByteSize(stats.total_compressed_entries_size).display().si(),
250 "decompressed entries size", ByteSize(stats.total_decompressed_entries_size).display().si(),
251 "total object size", ByteSize(stats.total_object_size).display().si(),
252 "pack size", ByteSize(stats.pack_size).display().si(),
253 width = width
254 )?;
255 #[rustfmt::skip]
256 writeln!(
257 out,
258 "\n\t{:<width$}: {}\n\t{:<width$}: {}\n\t{:<width$}: {}\n\t{:<width$}: {}",
259 "num trees", stats.num_trees,
260 "num blobs", stats.num_blobs,
261 "num commits", stats.num_commits,
262 "num tags", stats.num_tags,
263 width = width
264 )?;
265 let compression_ratio = stats.total_decompressed_entries_size as f64 / stats.total_compressed_entries_size as f64;
266 let delta_compression_ratio = stats.total_object_size as f64 / stats.total_compressed_entries_size as f64;
267 #[rustfmt::skip]
268 writeln!(
269 out,
270 "\n\t{:<width$}: {:.2}\n\t{:<width$}: {:.2}\n\t{:<width$}: {:.2}\n\t{:<width$}: {:.3}%",
271 "compression ratio", compression_ratio,
272 "delta compression ratio", delta_compression_ratio,
273 "delta gain", delta_compression_ratio / compression_ratio,
274 "pack overhead", (1.0 - (stats.total_compressed_entries_size as f64 / stats.pack_size as f64)) * 100.0,
275 width = width
276 )?;
277 Ok(())
278}