1use filetime::FileTime;
2
3use crate::{entry, extension, Entry, State, Version};
4
5mod entries;
6pub mod header;
8
9mod error {
10
11 use crate::{decode, extension};
12
13 #[derive(Debug, thiserror::Error)]
15 #[allow(missing_docs)]
16 pub enum Error {
17 #[error(transparent)]
18 Header(#[from] decode::header::Error),
19 #[error("Could not parse entry at index {index}")]
20 Entry { index: u32 },
21 #[error("Mandatory extension wasn't implemented or malformed.")]
22 Extension(#[from] extension::decode::Error),
23 #[error("Index trailer should have been {expected} bytes long, but was {actual}")]
24 UnexpectedTrailerLength { expected: usize, actual: usize },
25 #[error("Shared index checksum was {actual_checksum} but should have been {expected_checksum}")]
26 ChecksumMismatch {
27 actual_checksum: git_hash::ObjectId,
28 expected_checksum: git_hash::ObjectId,
29 },
30 }
31}
32pub use error::Error;
33use git_features::parallel::InOrderIter;
34
35use crate::util::read_u32;
36
37#[derive(Default, Clone, Copy)]
39pub struct Options {
40 pub thread_limit: Option<usize>,
46 pub min_extension_block_in_bytes_for_threading: usize,
49 pub expected_checksum: Option<git_hash::ObjectId>,
53}
54
55impl State {
56 pub fn from_bytes(
59 data: &[u8],
60 timestamp: FileTime,
61 object_hash: git_hash::Kind,
62 Options {
63 thread_limit,
64 min_extension_block_in_bytes_for_threading,
65 expected_checksum,
66 }: Options,
67 ) -> Result<(Self, git_hash::ObjectId), Error> {
68 let (version, num_entries, post_header_data) = header::decode(data, object_hash)?;
69 let start_of_extensions = extension::end_of_index_entry::decode(data, object_hash);
70
71 let mut num_threads = git_features::parallel::num_threads(thread_limit);
72 let path_backing_buffer_size = entries::estimate_path_storage_requirements_in_bytes(
73 num_entries,
74 data.len(),
75 start_of_extensions,
76 object_hash,
77 version,
78 );
79
80 let (entries, ext, data) = match start_of_extensions {
81 Some(offset) if num_threads > 1 => {
82 let extensions_data = &data[offset..];
83 let index_offsets_table = extension::index_entry_offset_table::find(extensions_data, object_hash);
84 let (entries_res, ext_res) = git_features::parallel::threads(|scope| {
85 let extension_loading =
86 (extensions_data.len() > min_extension_block_in_bytes_for_threading).then({
87 num_threads -= 1;
88 || {
89 scope
90 .builder()
91 .name("git-index.from_bytes.load-extensions".into())
92 .spawn(|_| extension::decode::all(extensions_data, object_hash))
93 .expect("valid name")
94 }
95 });
96 let entries_res = match index_offsets_table {
97 Some(entry_offsets) => {
98 let chunk_size = (entry_offsets.len() as f32 / num_threads as f32).ceil() as usize;
99 let num_chunks = entry_offsets.chunks(chunk_size).count();
100 let mut threads = Vec::with_capacity(num_chunks);
101 for (id, chunks) in entry_offsets.chunks(chunk_size).enumerate() {
102 let chunks = chunks.to_vec();
103 threads.push(
104 scope
105 .builder()
106 .name(format!("git-index.from_bytes.read-entries.{id}"))
107 .spawn(move |_| {
108 let num_entries_for_chunks =
109 chunks.iter().map(|c| c.num_entries).sum::<u32>() as usize;
110 let mut entries = Vec::with_capacity(num_entries_for_chunks);
111 let path_backing_buffer_size_for_chunks =
112 entries::estimate_path_storage_requirements_in_bytes(
113 num_entries_for_chunks as u32,
114 data.len() / num_chunks,
115 start_of_extensions.map(|ofs| ofs / num_chunks),
116 object_hash,
117 version,
118 );
119 let mut path_backing =
120 Vec::with_capacity(path_backing_buffer_size_for_chunks);
121 let mut is_sparse = false;
122 for offset in chunks {
123 let (
124 entries::Outcome {
125 is_sparse: chunk_is_sparse,
126 },
127 _data,
128 ) = entries::chunk(
129 &data[offset.from_beginning_of_file as usize..],
130 &mut entries,
131 &mut path_backing,
132 offset.num_entries,
133 object_hash,
134 version,
135 )?;
136 is_sparse |= chunk_is_sparse;
137 }
138 Ok::<_, Error>((
139 id,
140 EntriesOutcome {
141 entries,
142 path_backing,
143 is_sparse,
144 },
145 ))
146 })
147 .expect("valid name"),
148 );
149 }
150 let mut results =
151 InOrderIter::from(threads.into_iter().map(|thread| thread.join().unwrap()));
152 let mut acc = results.next().expect("have at least two results, one per thread");
153 while let (Ok(lhs), Some(res)) = (acc.as_mut(), results.next()) {
162 match res {
163 Ok(rhs) => {
164 lhs.is_sparse |= rhs.is_sparse;
165 let ofs = lhs.path_backing.len();
166 lhs.path_backing.extend(rhs.path_backing);
167 lhs.entries.extend(rhs.entries.into_iter().map(|mut e| {
168 e.path.start += ofs;
169 e.path.end += ofs;
170 e
171 }));
172 }
173 Err(err) => {
174 acc = Err(err);
175 }
176 }
177 }
178 acc.map(|acc| (acc, &data[data.len() - object_hash.len_in_bytes()..]))
179 }
180 None => entries(
181 post_header_data,
182 path_backing_buffer_size,
183 num_entries,
184 object_hash,
185 version,
186 ),
187 };
188 let ext_res = extension_loading
189 .map(|thread| thread.join().unwrap())
190 .unwrap_or_else(|| extension::decode::all(extensions_data, object_hash));
191 (entries_res, ext_res)
192 })
193 .unwrap(); let (ext, data) = ext_res?;
195 (entries_res?.0, ext, data)
196 }
197 None | Some(_) => {
198 let (entries, data) = entries(
199 post_header_data,
200 path_backing_buffer_size,
201 num_entries,
202 object_hash,
203 version,
204 )?;
205 let (ext, data) = extension::decode::all(data, object_hash)?;
206 (entries, ext, data)
207 }
208 };
209
210 if data.len() != object_hash.len_in_bytes() {
211 return Err(Error::UnexpectedTrailerLength {
212 expected: object_hash.len_in_bytes(),
213 actual: data.len(),
214 });
215 }
216
217 let checksum = git_hash::ObjectId::from(data);
218 if let Some(expected_checksum) = expected_checksum {
219 if checksum != expected_checksum {
220 return Err(Error::ChecksumMismatch {
221 actual_checksum: checksum,
222 expected_checksum,
223 });
224 }
225 }
226 let EntriesOutcome {
227 entries,
228 path_backing,
229 mut is_sparse,
230 } = entries;
231 let extension::decode::Outcome {
232 tree,
233 link,
234 resolve_undo,
235 untracked,
236 fs_monitor,
237 is_sparse: is_sparse_from_ext, } = ext;
239 is_sparse |= is_sparse_from_ext;
240
241 Ok((
242 State {
243 object_hash,
244 timestamp,
245 version,
246 entries,
247 path_backing,
248 is_sparse,
249
250 tree,
251 link,
252 resolve_undo,
253 untracked,
254 fs_monitor,
255 },
256 checksum,
257 ))
258 }
259}
260
261struct EntriesOutcome {
262 pub entries: Vec<Entry>,
263 pub path_backing: Vec<u8>,
264 pub is_sparse: bool,
265}
266
267fn entries(
268 post_header_data: &[u8],
269 path_backing_buffer_size: usize,
270 num_entries: u32,
271 object_hash: git_hash::Kind,
272 version: Version,
273) -> Result<(EntriesOutcome, &[u8]), Error> {
274 let mut entries = Vec::with_capacity(num_entries as usize);
275 let mut path_backing = Vec::with_capacity(path_backing_buffer_size);
276 entries::chunk(
277 post_header_data,
278 &mut entries,
279 &mut path_backing,
280 num_entries,
281 object_hash,
282 version,
283 )
284 .map(|(entries::Outcome { is_sparse }, data): (entries::Outcome, &[u8])| {
285 (
286 EntriesOutcome {
287 entries,
288 path_backing,
289 is_sparse,
290 },
291 data,
292 )
293 })
294}
295
296pub(crate) fn stat(data: &[u8]) -> Option<(entry::Stat, &[u8])> {
297 let (ctime_secs, data) = read_u32(data)?;
298 let (ctime_nsecs, data) = read_u32(data)?;
299 let (mtime_secs, data) = read_u32(data)?;
300 let (mtime_nsecs, data) = read_u32(data)?;
301 let (dev, data) = read_u32(data)?;
302 let (ino, data) = read_u32(data)?;
303 let (uid, data) = read_u32(data)?;
304 let (gid, data) = read_u32(data)?;
305 let (size, data) = read_u32(data)?;
306 Some((
307 entry::Stat {
308 mtime: entry::Time {
309 secs: ctime_secs,
310 nsecs: ctime_nsecs,
311 },
312 ctime: entry::Time {
313 secs: mtime_secs,
314 nsecs: mtime_nsecs,
315 },
316 dev,
317 ino,
318 uid,
319 gid,
320 size,
321 },
322 data,
323 ))
324}