1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
use crate::{pack, pack::index::access::PackOffset, pack::tree::Tree};
use git_features::{
interrupt::is_triggered,
progress::{self, Progress},
};
use std::{
fs, io,
io::{BufRead, Read},
time::Instant,
};
#[derive(thiserror::Error, Debug)]
#[allow(missing_docs)]
pub enum Error {
#[error("{message}")]
Io { source: io::Error, message: &'static str },
#[error(transparent)]
Header(#[from] pack::data::header::decode::Error),
#[error("Could find object with id {id} in this pack. Thin packs are not supported")]
UnresolvedRefDelta { id: git_hash::ObjectId },
#[error(transparent)]
Tree(#[from] pack::tree::Error),
#[error("Interrupted")]
Interrupted,
}
const PACK_HEADER_LEN: usize = 12;
impl<T> Tree<T> {
pub fn from_offsets_in_pack(
data_sorted_by_offsets: impl Iterator<Item = T>,
get_pack_offset: impl Fn(&T) -> PackOffset,
pack_path: impl AsRef<std::path::Path>,
mut progress: impl Progress,
resolve_in_pack_id: impl Fn(&git_hash::oid) -> Option<PackOffset>,
) -> Result<Self, Error> {
let mut r = io::BufReader::with_capacity(
8192 * 8,
fs::File::open(pack_path).map_err(|err| Error::Io {
source: err,
message: "open pack path",
})?,
);
let anticpiated_num_objects = if let Some(num_objects) = data_sorted_by_offsets.size_hint().1 {
progress.init(Some(num_objects), progress::count("objects"));
num_objects
} else {
0
};
let mut tree = Tree::with_capacity(anticpiated_num_objects)?;
{
let mut buf = [0u8; PACK_HEADER_LEN];
r.read_exact(&mut buf).map_err(|err| Error::Io {
source: err,
message: "reading header buffer with at least 12 bytes failed - pack file truncated?",
})?;
pack::data::header::decode(&buf)?;
}
let then = Instant::now();
let mut previous_cursor_position = None::<u64>;
for (idx, data) in data_sorted_by_offsets.enumerate() {
let pack_offset = get_pack_offset(&data);
if let Some(previous_offset) = previous_cursor_position {
Self::advance_cursor_to_pack_offset(&mut r, pack_offset, previous_offset)?;
};
let entry = pack::data::Entry::from_read(&mut r, pack_offset).map_err(|err| Error::Io {
source: err,
message: "EOF while parsing header",
})?;
previous_cursor_position = Some(pack_offset + entry.header_size() as u64);
use pack::data::entry::Header::*;
match entry.header {
Tree | Blob | Commit | Tag => {
tree.add_root(pack_offset, data)?;
}
RefDelta { base_id } => {
resolve_in_pack_id(base_id.as_ref())
.ok_or(Error::UnresolvedRefDelta { id: base_id })
.and_then(|base_pack_offset| {
tree.add_child(base_pack_offset, pack_offset, data).map_err(Into::into)
})?;
}
OfsDelta { base_distance } => {
let base_pack_offset = pack_offset
.checked_sub(base_distance)
.expect("in bound distance for deltas");
tree.add_child(base_pack_offset, pack_offset, data)?;
}
};
progress.inc();
if idx % 10_000 == 0 && is_triggered() {
return Err(Error::Interrupted);
}
}
progress.show_throughput(then);
Ok(tree)
}
fn advance_cursor_to_pack_offset(
r: &mut io::BufReader<fs::File>,
pack_offset: u64,
previous_offset: u64,
) -> Result<(), Error> {
let mut bytes_to_skip = pack_offset
.checked_sub(previous_offset)
.expect("continuously ascending pack offets") as usize;
while bytes_to_skip != 0 {
let buf = r.fill_buf().map_err(|err| Error::Io {
source: err,
message: "skip bytes",
})?;
if buf.is_empty() {
return Err(Error::Io {
source: io::Error::new(
io::ErrorKind::UnexpectedEof,
"ran out of bytes before reading desired amount of bytes",
),
message: "index file is damaged or corrupt",
});
}
let bytes = buf.len().min(bytes_to_skip);
r.consume(bytes);
bytes_to_skip -= bytes;
}
Ok(())
}
}