git_internal/internal/pack/
pack_index.rs1use tokio::sync::mpsc;
5
6pub use crate::internal::pack::index_entry::IndexEntry;
7use crate::{errors::GitError, hash::ObjectHash, utils::HashAlgorithm};
8
9pub struct IdxBuilder {
16 sender: Option<mpsc::Sender<Vec<u8>>>,
17 inner_hash: HashAlgorithm, object_number: usize,
19 pack_hash: ObjectHash,
20}
21
22impl IdxBuilder {
23 pub fn new(object_number: usize, sender: mpsc::Sender<Vec<u8>>, pack_hash: ObjectHash) -> Self {
25 Self {
26 sender: Some(sender),
27 inner_hash: HashAlgorithm::new(),
28 object_number,
29 pack_hash,
30 }
31 }
32
33 pub fn drop_sender(&mut self) {
35 self.sender.take(); }
37
38 async fn send_data(&mut self, data: Vec<u8>) -> Result<(), GitError> {
40 if let Some(sender) = &self.sender {
41 self.inner_hash.update(&data);
42 sender.send(data).await.map_err(|e| {
43 GitError::IOError(std::io::Error::new(
44 std::io::ErrorKind::BrokenPipe,
45 format!("Failed to send idx data: {e}"),
46 ))
47 })?;
48 }
49 Ok(())
50 }
51
52 async fn send_data_without_update_hash(&mut self, data: Vec<u8>) -> Result<(), GitError> {
54 if let Some(sender) = &self.sender {
55 sender.send(data).await.map_err(|e| {
56 GitError::IOError(std::io::Error::new(
57 std::io::ErrorKind::BrokenPipe,
58 format!("Failed to send idx data: {e}"),
59 ))
60 })?;
61 }
62 Ok(())
63 }
64
65 async fn send_u32(&mut self, v: u32) -> Result<(), GitError> {
67 self.send_data(v.to_be_bytes().to_vec()).await
68 }
69
70 async fn send_u64(&mut self, v: u64) -> Result<(), GitError> {
72 self.send_data(v.to_be_bytes().to_vec()).await
73 }
74
75 async fn write_header(&mut self) -> Result<(), GitError> {
78 let header: [u8; 8] = [0xFF, 0x74, 0x4F, 0x63, 0, 0, 0, 2];
81 self.send_data(header.to_vec()).await
82 }
83
84 async fn write_fanout(&mut self, entries: &mut [IndexEntry]) -> Result<(), GitError> {
86 entries.sort_by(|a, b| a.hash.cmp(&b.hash));
87 let mut fanout = [0u32; 256];
88 for entry in entries.iter() {
89 fanout[entry.hash.to_data()[0] as usize] += 1;
90 }
91
92 for i in 1..fanout.len() {
94 fanout[i] += fanout[i - 1];
95 }
96
97 for &count in fanout.iter() {
99 self.send_u32(count).await?;
100 }
101
102 Ok(())
103 }
104
105 async fn write_names(&mut self, entries: &Vec<IndexEntry>) -> Result<(), GitError> {
107 for e in entries {
108 self.send_data(e.hash.to_data().clone()).await?;
109 }
110
111 Ok(())
112 }
113
114 async fn write_crc32(&mut self, entries: &Vec<IndexEntry>) -> Result<(), GitError> {
116 for e in entries {
117 self.send_u32(e.crc32).await?;
118 }
119
120 Ok(())
121 }
122
123 async fn write_offsets(&mut self, entries: &Vec<IndexEntry>) -> Result<(), GitError> {
125 let mut large = vec![];
126 for e in entries {
127 if e.offset <= 0x7FFF_FFFF {
128 self.send_u32(e.offset as u32).await?;
130 } else {
131 let marker = 0x8000_0000 | large.len() as u32;
133 self.send_u32(marker).await?;
134 large.push(e.offset);
135 }
136 }
137 for v in large {
138 self.send_u64(v).await?;
139 }
140 Ok(())
141 }
142
143 async fn write_trailer(&mut self) -> Result<(), GitError> {
145 self.send_data_without_update_hash(self.pack_hash.to_data().clone())
147 .await?;
148
149 let idx_hash = self.inner_hash.clone().finalize();
150 self.send_data(idx_hash).await?;
152 Ok(())
153 }
154
155 pub async fn write_idx(&mut self, mut entries: Vec<IndexEntry>) -> Result<(), GitError> {
157 if entries.len() != self.object_number {
159 return Err(GitError::ConversionError(format!(
160 "entries length {} != object_number {}",
161 entries.len(),
162 self.object_number
163 )));
164 }
165
166 self.write_header().await?;
168 self.write_fanout(&mut entries).await?;
169 self.write_names(&entries).await?;
170 self.write_crc32(&entries).await?;
171 self.write_offsets(&entries).await?;
172 self.write_trailer().await?;
173 self.drop_sender();
174 Ok(())
175 }
176}
177
178#[cfg(test)]
179mod tests {
180 use tokio::sync::mpsc;
181
182 use crate::{
183 errors::GitError,
184 hash::ObjectHash,
185 internal::pack::{index_entry::IndexEntry, pack_index::IdxBuilder},
186 };
187
188 fn fake_sha1(n: u8) -> ObjectHash {
190 ObjectHash::Sha1([n; 20])
191 }
192
193 fn build_entries_sha1(n: usize) -> Vec<IndexEntry> {
195 (0..n)
196 .map(|i| IndexEntry {
197 hash: fake_sha1(i as u8),
198 crc32: 0x12345678 + i as u32,
199 offset: 0x10 + (i as u64) * 3,
200 })
201 .collect()
202 }
203
204 #[tokio::test]
206 async fn test_idx_builder_sha1_basic() -> Result<(), GitError> {
207 let (tx, mut rx) = mpsc::channel::<Vec<u8>>(4096);
209
210 let object_number = 3;
211 let pack_hash = fake_sha1(0xAA);
212
213 let mut builder = IdxBuilder::new(object_number, tx, pack_hash);
214
215 let entries = build_entries_sha1(object_number);
216
217 builder.write_idx(entries).await?;
219
220 let mut out: Vec<u8> = Vec::new();
222 while let Some(chunk) = rx.recv().await {
223 out.extend_from_slice(&chunk);
224 }
225
226 assert_eq!(&out[0..8], &[0xFF, 0x74, 0x4F, 0x63, 0, 0, 0, 2]);
229
230 let fanout_start = 8;
233 let fanout_end = fanout_start + 256 * 4;
234 let fanout_bytes = &out[fanout_start..fanout_end];
235
236 let mut fanout = [0u32; 256];
238 fanout[0] = 1;
239 fanout[1] = 2;
240 fanout[2] = 3;
241 for i in 3..256 {
242 fanout[i] = 3;
243 }
244
245 for i in 0..256 {
246 let idx = i * 4;
247 let v = u32::from_be_bytes([
248 fanout_bytes[idx],
249 fanout_bytes[idx + 1],
250 fanout_bytes[idx + 2],
251 fanout_bytes[idx + 3],
252 ]);
253 assert_eq!(v, fanout[i], "fanout mismatch at index {i}");
254 }
255
256 let names_start = fanout_end;
258 let names_end = names_start + object_number * 20; let names_bytes = &out[names_start..names_end];
260
261 for i in 0..object_number {
262 let name = &names_bytes[i * 20..i * 20 + 20];
263 assert!(name.iter().all(|b| *b == i as u8));
264 }
265
266 let crc_start = names_end;
268 let crc_end = crc_start + object_number * 4;
269 let crc_bytes = &out[crc_start..crc_end];
270
271 for i in 0..object_number {
272 let expected = 0x12345678 + i as u32;
273 let actual = u32::from_be_bytes([
274 crc_bytes[4 * i],
275 crc_bytes[4 * i + 1],
276 crc_bytes[4 * i + 2],
277 crc_bytes[4 * i + 3],
278 ]);
279 assert_eq!(expected, actual);
280 }
281
282 let offset_start = crc_end;
284 let offset_end = offset_start + object_number * 4;
285 let offsets_bytes = &out[offset_start..offset_end];
286
287 for i in 0..object_number {
288 let expected = 0x10 + (i as u64) * 3;
289 let actual = u32::from_be_bytes([
290 offsets_bytes[i * 4],
291 offsets_bytes[i * 4 + 1],
292 offsets_bytes[i * 4 + 2],
293 offsets_bytes[i * 4 + 3],
294 ]);
295 assert_eq!(expected as u32, actual);
296 }
297
298 let trailer_pack_hash_start = offset_end;
300 let trailer_pack_hash_end = trailer_pack_hash_start + 20;
301 let pack_hash_bytes = &out[trailer_pack_hash_start..trailer_pack_hash_end];
302 assert!(pack_hash_bytes.iter().all(|b| *b == 0xAA));
303
304 let idx_hash = &out[trailer_pack_hash_end..trailer_pack_hash_end + 20];
306 assert_eq!(idx_hash.len(), 20);
307
308 Ok(())
309 }
310}