git_pack/multi_index/
chunk.rs1pub mod index_names {
3 use std::path::{Path, PathBuf};
4
5 use git_object::bstr::{BString, ByteSlice};
6
7 pub const ID: git_chunk::Id = *b"PNAM";
9
10 pub mod decode {
12 use git_object::bstr::BString;
13
14 #[derive(Debug, thiserror::Error)]
16 #[allow(missing_docs)]
17 pub enum Error {
18 #[error("The pack names were not ordered alphabetically.")]
19 NotOrderedAlphabetically,
20 #[error("Each pack path name must be terminated with a null byte")]
21 MissingNullByte,
22 #[error("Couldn't turn path '{path}' into OS path due to encoding issues")]
23 PathEncoding { path: BString },
24 #[error("non-padding bytes found after all paths were read.")]
25 UnknownTrailerBytes,
26 }
27 }
28
29 pub fn from_bytes(mut chunk: &[u8], num_packs: u32) -> Result<Vec<PathBuf>, decode::Error> {
32 let mut out = Vec::new();
33 for _ in 0..num_packs {
34 let null_byte_pos = chunk.find_byte(b'\0').ok_or(decode::Error::MissingNullByte)?;
35
36 let path = &chunk[..null_byte_pos];
37 let path = git_path::try_from_byte_slice(path)
38 .map_err(|_| decode::Error::PathEncoding {
39 path: BString::from(path),
40 })?
41 .to_owned();
42
43 if let Some(previous) = out.last() {
44 if previous >= &path {
45 return Err(decode::Error::NotOrderedAlphabetically);
46 }
47 }
48 out.push(path);
49
50 chunk = &chunk[null_byte_pos + 1..];
51 }
52
53 if !chunk.is_empty() && !chunk.iter().all(|b| *b == 0) {
54 return Err(decode::Error::UnknownTrailerBytes);
55 }
56 Ok(out)
59 }
60
61 pub fn storage_size(paths: impl IntoIterator<Item = impl AsRef<Path>>) -> u64 {
64 let mut count = 0u64;
65 for path in paths {
66 let path = path.as_ref();
67 let ascii_path = path.to_str().expect("UTF-8 compatible paths");
68 assert!(
69 ascii_path.is_ascii(),
70 "must use ascii bytes for correct size computation"
71 );
72 count += (ascii_path.as_bytes().len() + 1) as u64
73 }
74
75 let needed_alignment = CHUNK_ALIGNMENT - (count % CHUNK_ALIGNMENT);
76 if needed_alignment < CHUNK_ALIGNMENT {
77 count += needed_alignment;
78 }
79 count
80 }
81
82 pub fn write(
84 paths: impl IntoIterator<Item = impl AsRef<Path>>,
85 mut out: impl std::io::Write,
86 ) -> std::io::Result<()> {
87 let mut written_bytes = 0;
88 for path in paths {
89 let path = path.as_ref().to_str().expect("UTF-8 path");
90 out.write_all(path.as_bytes())?;
91 out.write_all(&[0])?;
92 written_bytes += path.as_bytes().len() as u64 + 1;
93 }
94
95 let needed_alignment = CHUNK_ALIGNMENT - (written_bytes % CHUNK_ALIGNMENT);
96 if needed_alignment < CHUNK_ALIGNMENT {
97 let padding = [0u8; CHUNK_ALIGNMENT as usize];
98 out.write_all(&padding[..needed_alignment as usize])?;
99 }
100 Ok(())
101 }
102
103 const CHUNK_ALIGNMENT: u64 = 4;
104}
105
106pub mod fanout {
108 use std::convert::TryInto;
109
110 use crate::multi_index;
111
112 pub const SIZE: usize = 4 * 256;
114
115 pub const ID: git_chunk::Id = *b"OIDF";
117
118 pub fn from_bytes(chunk: &[u8]) -> Option<[u32; 256]> {
120 if chunk.len() != SIZE {
121 return None;
122 }
123 let mut out = [0; 256];
124 for (c, f) in chunk.chunks(4).zip(out.iter_mut()) {
125 *f = u32::from_be_bytes(c.try_into().unwrap());
126 }
127 out.into()
128 }
129
130 pub(crate) fn write(
132 sorted_entries: &[multi_index::write::Entry],
133 mut out: impl std::io::Write,
134 ) -> std::io::Result<()> {
135 let fanout = crate::index::write::encode::fanout(sorted_entries.iter().map(|e| e.id.first_byte()));
136
137 for value in fanout.iter() {
138 out.write_all(&value.to_be_bytes())?;
139 }
140 Ok(())
141 }
142}
143
144pub mod lookup {
146 use std::ops::Range;
147
148 use crate::multi_index;
149
150 pub const ID: git_chunk::Id = *b"OIDL";
152
153 pub fn storage_size(entries: usize, object_hash: git_hash::Kind) -> u64 {
155 (entries * object_hash.len_in_bytes()) as u64
156 }
157
158 pub(crate) fn write(
159 sorted_entries: &[multi_index::write::Entry],
160 mut out: impl std::io::Write,
161 ) -> std::io::Result<()> {
162 for entry in sorted_entries {
163 out.write_all(entry.id.as_slice())?;
164 }
165 Ok(())
166 }
167
168 pub fn is_valid(offset: &Range<usize>, hash: git_hash::Kind, num_objects: u32) -> bool {
170 (offset.end - offset.start) / hash.len_in_bytes() == num_objects as usize
171 }
172}
173
174pub mod offsets {
176 use std::{convert::TryInto, ops::Range};
177
178 use crate::multi_index;
179
180 pub const ID: git_chunk::Id = *b"OOFF";
182
183 pub fn storage_size(entries: usize) -> u64 {
185 (entries * (4 + 4)) as u64
186 }
187
188 pub(crate) fn write(
189 sorted_entries: &[multi_index::write::Entry],
190 large_offsets_needed: bool,
191 mut out: impl std::io::Write,
192 ) -> std::io::Result<()> {
193 use crate::index::write::encode::{HIGH_BIT, LARGE_OFFSET_THRESHOLD};
194 let mut num_large_offsets = 0u32;
195
196 for entry in sorted_entries {
197 out.write_all(&entry.pack_index.to_be_bytes())?;
198
199 let offset: u32 = if large_offsets_needed {
200 if entry.pack_offset > LARGE_OFFSET_THRESHOLD {
201 let res = num_large_offsets | HIGH_BIT;
202 num_large_offsets += 1;
203 res
204 } else {
205 entry.pack_offset as u32
206 }
207 } else {
208 entry
209 .pack_offset
210 .try_into()
211 .expect("without large offsets, pack-offset fits u32")
212 };
213 out.write_all(&offset.to_be_bytes())?;
214 }
215 Ok(())
216 }
217
218 pub fn is_valid(offset: &Range<usize>, num_objects: u32) -> bool {
220 let entry_size = 4 + 4 ;
221 ((offset.end - offset.start) / num_objects as usize) == entry_size
222 }
223}
224
225pub mod large_offsets {
227 use std::ops::Range;
228
229 use crate::{index::write::encode::LARGE_OFFSET_THRESHOLD, multi_index};
230
231 pub const ID: git_chunk::Id = *b"LOFF";
233
234 pub(crate) fn num_large_offsets(entries: &[multi_index::write::Entry]) -> Option<usize> {
236 let mut num_large_offsets = 0;
237 let mut needs_large_offsets = false;
238 for entry in entries {
239 if entry.pack_offset > LARGE_OFFSET_THRESHOLD {
240 num_large_offsets += 1;
241 }
242 if entry.pack_offset > u32::MAX as crate::data::Offset {
243 needs_large_offsets = true;
244 }
245 }
246
247 needs_large_offsets.then_some(num_large_offsets)
248 }
249 pub fn is_valid(offset: &Range<usize>) -> bool {
251 (offset.end - offset.start) % 8 == 0
252 }
253
254 pub(crate) fn write(
255 sorted_entries: &[multi_index::write::Entry],
256 mut num_large_offsets: usize,
257 mut out: impl std::io::Write,
258 ) -> std::io::Result<()> {
259 for offset in sorted_entries
260 .iter()
261 .filter_map(|e| (e.pack_offset > LARGE_OFFSET_THRESHOLD).then_some(e.pack_offset))
262 {
263 out.write_all(&offset.to_be_bytes())?;
264 num_large_offsets = num_large_offsets
265 .checked_sub(1)
266 .expect("BUG: wrote more offsets the previously found");
267 }
268 assert_eq!(num_large_offsets, 0, "BUG: wrote less offsets than initially counted");
269 Ok(())
270 }
271
272 pub(crate) fn storage_size(large_offsets: usize) -> u64 {
274 8 * large_offsets as u64
275 }
276}