1use std::{ffi::OsStr, io, path::Path, str::FromStr, sync::atomic::AtomicBool};
2
3use anyhow::{anyhow, Context as AnyhowContext, Result};
4use bytesize::ByteSize;
5use gix::{
6 object, odb,
7 odb::{pack, pack::index},
8 NestedProgress,
9};
10pub use index::verify::Mode;
11pub const PROGRESS_RANGE: std::ops::RangeInclusive<u8> = 1..=2;
12
13use crate::OutputFormat;
14
15#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
16pub enum Algorithm {
17 LessTime,
18 LessMemory,
19}
20
21impl Algorithm {
22 pub fn variants() -> &'static [&'static str] {
23 &["less-time", "less-memory"]
24 }
25}
26
27impl FromStr for Algorithm {
28 type Err = String;
29
30 fn from_str(s: &str) -> Result<Self, Self::Err> {
31 let s_lc = s.to_ascii_lowercase();
32 Ok(match s_lc.as_str() {
33 "less-memory" => Algorithm::LessMemory,
34 "less-time" => Algorithm::LessTime,
35 _ => return Err(format!("Invalid verification algorithm: '{s}'")),
36 })
37 }
38}
39
40impl From<Algorithm> for index::traverse::Algorithm {
41 fn from(v: Algorithm) -> Self {
42 match v {
43 Algorithm::LessMemory => index::traverse::Algorithm::Lookup,
44 Algorithm::LessTime => index::traverse::Algorithm::DeltaTreeLookup,
45 }
46 }
47}
48
49pub struct Context<'a, W1: io::Write, W2: io::Write> {
51 pub output_statistics: Option<OutputFormat>,
53 pub out: W1,
55 pub err: W2,
57 pub thread_limit: Option<usize>,
61 pub mode: index::verify::Mode,
62 pub algorithm: Algorithm,
63 pub should_interrupt: &'a AtomicBool,
64 pub object_hash: gix::hash::Kind,
65}
66
67enum EitherCache<const SIZE: usize> {
68 Left(pack::cache::Never),
69 Right(pack::cache::lru::StaticLinkedList<SIZE>),
70}
71
72impl<const SIZE: usize> pack::cache::DecodeEntry for EitherCache<SIZE> {
73 fn put(&mut self, pack_id: u32, offset: u64, data: &[u8], kind: object::Kind, compressed_size: usize) {
74 match self {
75 EitherCache::Left(v) => v.put(pack_id, offset, data, kind, compressed_size),
76 EitherCache::Right(v) => v.put(pack_id, offset, data, kind, compressed_size),
77 }
78 }
79
80 fn get(&mut self, pack_id: u32, offset: u64, out: &mut Vec<u8>) -> Option<(object::Kind, usize)> {
81 match self {
82 EitherCache::Left(v) => v.get(pack_id, offset, out),
83 EitherCache::Right(v) => v.get(pack_id, offset, out),
84 }
85 }
86}
87
88pub fn pack_or_pack_index<W1, W2>(
89 path: impl AsRef<Path>,
90 mut progress: impl NestedProgress + 'static,
91 Context {
92 mut out,
93 mut err,
94 mode,
95 output_statistics,
96 thread_limit,
97 algorithm,
98 should_interrupt,
99 object_hash,
100 }: Context<'_, W1, W2>,
101) -> Result<()>
102where
103 W1: io::Write,
104 W2: io::Write,
105{
106 let path = path.as_ref();
107 let ext = path.extension().and_then(OsStr::to_str).unwrap_or("");
108 const CACHE_SIZE: usize = 64;
109 let cache = || -> EitherCache<CACHE_SIZE> {
110 if matches!(algorithm, Algorithm::LessMemory) {
111 if output_statistics.is_some() {
112 EitherCache::Left(pack::cache::Never)
114 } else {
115 EitherCache::Right(pack::cache::lru::StaticLinkedList::<CACHE_SIZE>::default())
116 }
117 } else {
118 EitherCache::Left(pack::cache::Never)
119 }
120 };
121 let res = match ext {
122 "pack" => {
123 let pack = odb::pack::data::File::at(path, object_hash).with_context(|| "Could not open pack file")?;
124 pack.verify_checksum(&mut progress.add_child("Sha1 of pack"), should_interrupt)
125 .map(|id| (id, None))?
126 }
127 "idx" => {
128 let idx =
129 odb::pack::index::File::at(path, object_hash).with_context(|| "Could not open pack index file")?;
130 let packfile_path = path.with_extension("pack");
131 let pack = odb::pack::data::File::at(&packfile_path, object_hash)
132 .map_err(|e| {
133 writeln!(
134 err,
135 "Could not find matching pack file at '{}' - only index file will be verified, error was: {}",
136 packfile_path.display(),
137 e
138 )
139 .ok();
140 e
141 })
142 .ok();
143
144 idx.verify_integrity(
145 pack.as_ref().map(|p| gix::odb::pack::index::verify::PackContext {
146 data: p,
147 options: gix::odb::pack::index::verify::integrity::Options {
148 verify_mode: mode,
149 traversal: algorithm.into(),
150 make_pack_lookup_cache: cache,
151 thread_limit
152 }
153 }),
154 &mut progress,
155 should_interrupt,
156 )
157 .map(|o| (o.actual_index_checksum, o.pack_traverse_statistics))
158 .with_context(|| "Verification failure")?
159 }
160 "" => {
161 match path.file_name() {
162 Some(file_name) if file_name == "multi-pack-index" => {
163 let multi_index = gix::odb::pack::multi_index::File::at(path)?;
164 let res = multi_index.verify_integrity(&mut progress, should_interrupt, gix::odb::pack::index::verify::integrity::Options{
165 verify_mode: mode,
166 traversal: algorithm.into(),
167 thread_limit,
168 make_pack_lookup_cache: cache
169 })?;
170 match output_statistics {
171 Some(OutputFormat::Human) => {
172 for (index_name, stats) in multi_index.index_names().iter().zip(res.pack_traverse_statistics) {
173 writeln!(out, "{}", index_name.display()).ok();
174 drop(print_statistics(&mut out, &stats));
175 }
176 },
177 #[cfg(feature = "serde")]
178 Some(OutputFormat::Json) => serde_json::to_writer_pretty(out, &multi_index.index_names().iter().zip(res.pack_traverse_statistics).collect::<Vec<_>>())?,
179 _ => {}
180 }
181 return Ok(())
182 },
183 _ => return Err(anyhow!(
184 "Cannot determine data type on path without extension '{}', expecting default extensions 'idx' and 'pack'",
185 path.display()
186 ))
187 }
188 }
189 ext => return Err(anyhow!("Unknown extension {:?}, expecting 'idx' or 'pack'", ext)),
190 };
191 if let Some(stats) = res.1.as_ref() {
192 #[cfg_attr(not(feature = "serde"), allow(clippy::single_match))]
193 match output_statistics {
194 Some(OutputFormat::Human) => drop(print_statistics(&mut out, stats)),
195 #[cfg(feature = "serde")]
196 Some(OutputFormat::Json) => serde_json::to_writer_pretty(out, stats)?,
197 _ => {}
198 }
199 }
200 Ok(())
201}
202
203fn print_statistics(out: &mut impl io::Write, stats: &index::traverse::Statistics) -> io::Result<()> {
204 writeln!(out, "objects per delta chain length")?;
205 let mut chain_length_to_object: Vec<_> = stats.objects_per_chain_length.iter().map(|(a, b)| (*a, *b)).collect();
206 chain_length_to_object.sort_by_key(|e| e.0);
207 let mut total_object_count = 0;
208 for (chain_length, object_count) in chain_length_to_object.into_iter() {
209 total_object_count += object_count;
210 writeln!(out, "\t{chain_length:>2}: {object_count}")?;
211 }
212 writeln!(out, "\t->: {total_object_count}")?;
213
214 let pack::data::decode::entry::Outcome {
215 kind: _,
216 num_deltas,
217 decompressed_size,
218 compressed_size,
219 object_size,
220 } = stats.average;
221
222 let width = 30;
223 writeln!(out, "\naverages")?;
224 #[rustfmt::skip]
225 writeln!(
226 out,
227 "\t{:<width$} {};\n\t{:<width$} {};\n\t{:<width$} {};\n\t{:<width$} {};",
228 "delta chain length:", num_deltas,
229 "decompressed entry [B]:", decompressed_size,
230 "compressed entry [B]:", compressed_size,
231 "decompressed object size [B]:", object_size,
232 width = width
233 )?;
234
235 writeln!(out, "\ncompression")?;
236 #[rustfmt::skip]
237 writeln!(
238 out, "\t{:<width$}: {}\n\t{:<width$}: {}\n\t{:<width$}: {}\n\t{:<width$}: {}",
239 "compressed entries size", ByteSize(stats.total_compressed_entries_size).display().si(),
240 "decompressed entries size", ByteSize(stats.total_decompressed_entries_size).display().si(),
241 "total object size", ByteSize(stats.total_object_size).display().si(),
242 "pack size", ByteSize(stats.pack_size).display().si(),
243 width = width
244 )?;
245 #[rustfmt::skip]
246 writeln!(
247 out,
248 "\n\t{:<width$}: {}\n\t{:<width$}: {}\n\t{:<width$}: {}\n\t{:<width$}: {}",
249 "num trees", stats.num_trees,
250 "num blobs", stats.num_blobs,
251 "num commits", stats.num_commits,
252 "num tags", stats.num_tags,
253 width = width
254 )?;
255 let compression_ratio = stats.total_decompressed_entries_size as f64 / stats.total_compressed_entries_size as f64;
256 let delta_compression_ratio = stats.total_object_size as f64 / stats.total_compressed_entries_size as f64;
257 #[rustfmt::skip]
258 writeln!(
259 out,
260 "\n\t{:<width$}: {:.2}\n\t{:<width$}: {:.2}\n\t{:<width$}: {:.2}\n\t{:<width$}: {:.3}%",
261 "compression ratio", compression_ratio,
262 "delta compression ratio", delta_compression_ratio,
263 "delta gain", delta_compression_ratio / compression_ratio,
264 "pack overhead", (1.0 - (stats.total_compressed_entries_size as f64 / stats.pack_size as f64)) * 100.0,
265 width = width
266 )?;
267 Ok(())
268}