1use std::collections::{BTreeSet, HashSet, VecDeque};
13use std::hash::BuildHasher;
14
15use crate::hash::Hash;
16use crate::object::Object;
17use crate::store::{ObjectStore, StoreError};
18
19pub const MAX_ANCESTORS: usize = 10_000;
21
22pub fn collect_ancestor_set<S: BuildHasher>(
39 store: &ObjectStore,
40 start: Hash,
41 set: &mut HashSet<Hash, S>,
42) -> Result<(), StoreError> {
43 let mut stack: Vec<Hash> = Vec::new();
44 stack.push(start);
45
46 let mut count: usize = 0;
47 while let Some(current) = stack.pop() {
48 if count >= MAX_ANCESTORS {
49 break;
50 }
51 if !set.insert(current) {
52 continue;
53 }
54 count += 1;
55
56 match store.read_object(¤t) {
57 Ok(Object::Commit(c)) => {
58 for &parent in &c.parents {
59 stack.push(parent);
60 }
61 }
62 Ok(_) | Err(StoreError::ObjectNotFound(_)) => {}
65 Err(e) => return Err(e),
66 }
67 }
68 Ok(())
69}
70
71pub const MAX_REACHABLE: usize = 10_000_000;
77
78pub fn reachable_objects(store: &ObjectStore, root: &Hash) -> Result<BTreeSet<Hash>, StoreError> {
102 reachable_closure(store, std::iter::once(root))
103}
104
105pub fn reachable_closure<'a, I>(store: &ObjectStore, roots: I) -> Result<BTreeSet<Hash>, StoreError>
124where
125 I: IntoIterator<Item = &'a Hash>,
126{
127 reachable_closure_checked(store, roots).map(|(out, _truncated)| out)
131}
132
133pub fn reachable_closure_checked<'a, I>(
143 store: &ObjectStore,
144 roots: I,
145) -> Result<(BTreeSet<Hash>, bool), StoreError>
146where
147 I: IntoIterator<Item = &'a Hash>,
148{
149 let mut out: BTreeSet<Hash> = BTreeSet::new();
150 let mut queue: VecDeque<Hash> = VecDeque::new();
151 for root in roots {
152 queue.push_back(*root);
153 }
154
155 let mut truncated = false;
156 while let Some(h) = queue.pop_front() {
157 if out.len() >= MAX_REACHABLE {
158 truncated = true;
161 break;
162 }
163 if !out.insert(h) {
164 continue;
165 }
166 let obj = store.read_object(&h)?;
169 match obj {
170 Object::Commit(c) => {
171 queue.push_back(c.tree_hash);
172 for p in c.parents {
173 queue.push_back(p);
174 }
175 }
176 Object::Remix(r) => {
177 queue.push_back(r.tree_hash);
178 for p in r.parents {
179 queue.push_back(p);
180 }
181 }
184 Object::Tree(t) => {
185 for e in t.entries {
186 queue.push_back(e.object_hash);
187 }
188 }
189 Object::ChunkedBlob(cb) => {
190 for c in cb.chunks {
191 queue.push_back(c);
192 }
193 }
194 Object::Tag(t) => {
195 queue.push_back(t.target);
199 }
200 Object::Blob(_) | Object::Delta(_) => {
201 }
203 }
204 }
205 Ok((out, truncated))
206}
207
208#[cfg(test)]
213#[allow(clippy::many_single_char_names)] mod tests {
215 use super::*;
216 use crate::hash;
217 use crate::object::EntryMode;
218 use crate::object::{Blob, Commit, Identity, Object, Tree, TreeEntry};
219 use crate::serialize;
220 use tempfile::TempDir;
221
222 fn store() -> (TempDir, ObjectStore) {
223 let d = TempDir::new().unwrap();
224 let s = ObjectStore::init(d.path()).unwrap();
225 (d, s)
226 }
227
228 fn put_blob(s: &ObjectStore, data: &[u8]) -> Hash {
229 let bytes = serialize::serialize(&Object::Blob(Blob {
230 data: data.to_vec(),
231 }))
232 .unwrap();
233 s.write(&bytes).unwrap()
234 }
235
236 fn put_tree(s: &ObjectStore, entries: Vec<TreeEntry>) -> Hash {
237 let bytes = serialize::serialize(&Object::Tree(Tree { entries })).unwrap();
238 s.write(&bytes).unwrap()
239 }
240
241 fn make_single_file_tree(s: &ObjectStore, name: &[u8], data: &[u8]) -> Hash {
242 let blob = put_blob(s, data);
243 put_tree(
244 s,
245 vec![TreeEntry {
246 name: name.to_vec(),
247 mode: EntryMode::Blob,
248 object_hash: blob,
249 }],
250 )
251 }
252
253 fn make_commit(s: &ObjectStore, tree: Hash, parents: &[Hash], message: &str) -> Hash {
254 let c = Commit {
255 tree_hash: tree,
256 parents: parents.to_vec(),
257 author: Identity::ed25519([0; 32]),
258 signer: [0; 32],
259 message: message.as_bytes().to_vec(),
260 timestamp: message.len() as u64, message_hash: [0; 32],
262 content_digest: [0; 32],
263 signature: [0; 64],
264 };
265 let bytes = serialize::serialize(&Object::Commit(c)).unwrap();
266 s.write(&bytes).unwrap()
267 }
268
269 #[test]
270 fn linear_chain_3_commits() {
271 let (_d, s) = store();
272 let tree = make_single_file_tree(&s, b"f", b"data");
273 let c1 = make_commit(&s, tree, &[], "c1");
274 let c2 = make_commit(&s, tree, &[c1], "c2");
275 let c3 = make_commit(&s, tree, &[c2], "c3");
276
277 let mut set = HashSet::new();
278 collect_ancestor_set(&s, c3, &mut set).unwrap();
279 assert_eq!(set.len(), 3);
280 assert!(set.contains(&c1));
281 assert!(set.contains(&c2));
282 assert!(set.contains(&c3));
283 }
284
285 #[test]
286 fn diamond_dag() {
287 let (_d, s) = store();
288 let tree = make_single_file_tree(&s, b"f.txt", b"data");
289 let c1 = make_commit(&s, tree, &[], "c1");
290 let c2 = make_commit(&s, tree, &[c1], "c2");
291 let c3 = make_commit(&s, tree, &[c1], "c3");
292 let c4 = make_commit(&s, tree, &[c2, c3], "c4");
293
294 let mut set = HashSet::new();
295 collect_ancestor_set(&s, c4, &mut set).unwrap();
296 assert_eq!(set.len(), 4);
297 assert!(set.contains(&c1));
298 assert!(set.contains(&c2));
299 assert!(set.contains(&c3));
300 assert!(set.contains(&c4));
301 }
302
303 #[test]
304 fn root_commit_alone() {
305 let (_d, s) = store();
306 let tree = make_single_file_tree(&s, b"f.txt", b"data");
307 let c1 = make_commit(&s, tree, &[], "root");
308
309 let mut set = HashSet::new();
310 collect_ancestor_set(&s, c1, &mut set).unwrap();
311 assert_eq!(set.len(), 1);
312 assert!(set.contains(&c1));
313 }
314
315 #[test]
316 fn handles_non_existent_parent_gracefully() {
317 let (_d, s) = store();
318 let tree = make_single_file_tree(&s, b"f.txt", b"data");
319 let fake_parent = hash::hash(b"nonexistent-parent");
320 let c1 = make_commit(&s, tree, &[fake_parent], "orphan");
321
322 let mut set = HashSet::new();
323 collect_ancestor_set(&s, c1, &mut set).unwrap();
324 assert_eq!(set.len(), 2);
327 assert!(set.contains(&c1));
328 assert!(set.contains(&fake_parent));
329 }
330
331 #[test]
332 fn empty_store_records_starting_hash() {
333 let (_d, s) = store();
334 let fake = hash::hash(b"does-not-exist");
335 let mut set = HashSet::new();
336 collect_ancestor_set(&s, fake, &mut set).unwrap();
337 assert_eq!(set.len(), 1);
338 assert!(set.contains(&fake));
339 }
340
341 #[test]
346 fn reachable_single_commit_includes_tree_and_blob() {
347 let (_d, s) = store();
348 let blob = put_blob(&s, b"hi");
349 let tree = put_tree(
350 &s,
351 vec![TreeEntry {
352 name: b"f".to_vec(),
353 mode: EntryMode::Blob,
354 object_hash: blob,
355 }],
356 );
357 let c1 = make_commit(&s, tree, &[], "c1");
358 let reach = reachable_objects(&s, &c1).unwrap();
359 assert!(reach.contains(&c1));
360 assert!(reach.contains(&tree));
361 assert!(reach.contains(&blob));
362 assert_eq!(reach.len(), 3);
363 }
364
365 #[test]
366 fn reachable_walks_parents_and_dedups() {
367 let (_d, s) = store();
368 let t = make_single_file_tree(&s, b"f", b"x");
369 let c1 = make_commit(&s, t, &[], "c1");
370 let c2 = make_commit(&s, t, &[c1], "c2");
371 let reach = reachable_objects(&s, &c2).unwrap();
372 assert_eq!(reach.len(), 4);
374 assert!(reach.contains(&c1));
375 assert!(reach.contains(&c2));
376 assert!(reach.contains(&t));
377 }
378
379 #[test]
380 fn reachable_walks_nested_trees() {
381 let (_d, s) = store();
382 let leaf = put_blob(&s, b"leaf");
383 let inner = put_tree(
384 &s,
385 vec![TreeEntry {
386 name: b"leaf".to_vec(),
387 mode: EntryMode::Blob,
388 object_hash: leaf,
389 }],
390 );
391 let outer = put_tree(
392 &s,
393 vec![TreeEntry {
394 name: b"sub".to_vec(),
395 mode: EntryMode::Tree,
396 object_hash: inner,
397 }],
398 );
399 let c1 = make_commit(&s, outer, &[], "c");
400 let reach = reachable_objects(&s, &c1).unwrap();
401 assert!(reach.contains(&outer));
402 assert!(reach.contains(&inner));
403 assert!(reach.contains(&leaf));
404 }
405
406 #[test]
407 fn reachable_walks_chunked_blob_chunks() {
408 use crate::object::ChunkedBlob;
409 let (_d, s) = store();
410 let c0 = put_blob(&s, b"A");
411 let c1 = put_blob(&s, b"B");
412 let cb_bytes = serialize::serialize(&Object::ChunkedBlob(ChunkedBlob {
413 total_size: 2,
414 chunk_size: 1,
415 chunks: vec![c0, c1],
416 }))
417 .unwrap();
418 let cb = s.write(&cb_bytes).unwrap();
419 let tree = put_tree(
420 &s,
421 vec![TreeEntry {
422 name: b"big".to_vec(),
423 mode: EntryMode::Blob,
424 object_hash: cb,
425 }],
426 );
427 let commit = make_commit(&s, tree, &[], "c");
428 let reach = reachable_objects(&s, &commit).unwrap();
429 assert!(reach.contains(&cb));
430 assert!(reach.contains(&c0));
431 assert!(reach.contains(&c1));
432 }
433
434 #[test]
435 fn reachable_missing_root_errors() {
436 let (_d, s) = store();
437 let fake = hash::hash(b"nope");
438 let err = reachable_objects(&s, &fake).unwrap_err();
439 assert!(matches!(err, StoreError::ObjectNotFound(_)));
440 }
441
442 #[test]
443 fn reachable_is_deterministic() {
444 let (_d, s) = store();
445 let t = make_single_file_tree(&s, b"f", b"x");
446 let c1 = make_commit(&s, t, &[], "c1");
447 let c2 = make_commit(&s, t, &[c1], "c2");
448 let a: Vec<Hash> = reachable_objects(&s, &c2).unwrap().into_iter().collect();
449 let b: Vec<Hash> = reachable_objects(&s, &c2).unwrap().into_iter().collect();
450 assert_eq!(a, b);
451 }
452}