1use std::collections::{BTreeSet, HashSet};
30use std::fs;
31use std::io;
32use std::path::{Path, PathBuf};
33
34use crate::hash::{self, HEX_LEN, Hash, ZERO};
35use crate::object::Object;
36use crate::store::ObjectStore;
37
38pub const BISECT_FILE: &str = "bisect";
40
41const MAX_BISECT_FILE_BYTES: u64 = 1024 * 1024;
42
43const MAX_ANCESTORS: usize = 10_000;
45
46#[derive(Debug, thiserror::Error)]
48pub enum BisectError {
49 #[error("no bisect in progress")]
50 NoBisectInProgress,
51 #[error("bisect state on disk is malformed")]
52 InvalidBisectState,
53 #[error(transparent)]
54 Io(#[from] io::Error),
55 #[error(transparent)]
56 Object(#[from] crate::object::MkitError),
57 #[error(transparent)]
58 Store(#[from] crate::store::StoreError),
59}
60
61pub type BisectResult<T> = Result<T, BisectError>;
63
64#[derive(Debug, Clone, PartialEq, Eq)]
69pub struct BisectState {
70 pub orig_head: Hash,
72 pub orig_branch: Option<String>,
74 pub bad_hash: Option<Hash>,
76 pub good_hashes: Vec<Hash>,
78 pub skipped: BTreeSet<Hash>,
82}
83
84#[derive(Debug, Clone, PartialEq, Eq)]
86pub enum BisectStep {
87 Testing { hash: Hash, remaining: usize },
90 Found(Hash),
92 NeedMore,
94}
95
96#[must_use]
98pub fn is_bisect_in_progress(mkit_dir: &Path) -> bool {
99 mkit_dir.join(BISECT_FILE).is_file()
100}
101
102pub fn read_state(mkit_dir: &Path) -> BisectResult<BisectState> {
108 let path = mkit_dir.join(BISECT_FILE);
109 let meta = match fs::metadata(&path) {
110 Ok(m) => m,
111 Err(e) if e.kind() == io::ErrorKind::NotFound => {
112 return Err(BisectError::NoBisectInProgress);
113 }
114 Err(e) => return Err(BisectError::Io(e)),
115 };
116 if meta.len() == 0 {
117 return Err(BisectError::InvalidBisectState);
118 }
119 if meta.len() > MAX_BISECT_FILE_BYTES {
120 return Err(BisectError::InvalidBisectState);
121 }
122 let raw = fs::read_to_string(&path)?;
123
124 let mut lines = raw.split('\n');
125
126 let orig_head_line = lines.next().ok_or(BisectError::InvalidBisectState)?;
127 let orig_head_trim = orig_head_line.trim_end_matches(['\r', ' ']);
128 if orig_head_trim.is_empty() {
129 return Err(BisectError::InvalidBisectState);
130 }
131 let orig_head = hash::from_hex(orig_head_trim).map_err(|_| BisectError::InvalidBisectState)?;
132
133 let branch_line = lines.next().ok_or(BisectError::InvalidBisectState)?;
134 let branch_trim = branch_line.trim_end_matches(['\r', ' ']);
135 let orig_branch = if branch_trim.is_empty() {
136 None
137 } else {
138 Some(branch_trim.to_string())
139 };
140
141 let bad_line = lines.next().ok_or(BisectError::InvalidBisectState)?;
142 let bad_trim = bad_line.trim_end_matches(['\r', ' ']);
143 let bad_hash = if bad_trim.is_empty() {
144 None
145 } else {
146 Some(hash::from_hex(bad_trim).map_err(|_| BisectError::InvalidBisectState)?)
147 };
148
149 let mut good_hashes = Vec::new();
150 let mut skipped = BTreeSet::new();
151 for line in lines {
152 let line = line.trim_end_matches(['\r', ' ']);
153 if line.is_empty() {
154 continue;
155 }
156 if let Some(rest) = line.strip_prefix("skip:") {
157 if rest.len() != HEX_LEN {
160 return Err(BisectError::InvalidBisectState);
161 }
162 let h = hash::from_hex(rest).map_err(|_| BisectError::InvalidBisectState)?;
163 skipped.insert(h);
164 } else {
165 if line.len() != HEX_LEN {
166 return Err(BisectError::InvalidBisectState);
167 }
168 let h = hash::from_hex(line).map_err(|_| BisectError::InvalidBisectState)?;
169 good_hashes.push(h);
170 }
171 }
172
173 Ok(BisectState {
174 orig_head,
175 orig_branch,
176 bad_hash,
177 good_hashes,
178 skipped,
179 })
180}
181
182pub fn write_state(mkit_dir: &Path, state: &BisectState) -> BisectResult<()> {
187 fs::create_dir_all(mkit_dir)?;
188 let mut buf = String::new();
189 buf.push_str(&hash::to_hex(&state.orig_head));
190 buf.push('\n');
191 if let Some(b) = &state.orig_branch {
192 buf.push_str(b);
193 }
194 buf.push('\n');
195 if let Some(bad) = &state.bad_hash {
196 buf.push_str(&hash::to_hex(bad));
197 }
198 buf.push('\n');
199 for g in &state.good_hashes {
200 buf.push_str(&hash::to_hex(g));
201 buf.push('\n');
202 }
203 for s in &state.skipped {
204 buf.push_str("skip:");
205 buf.push_str(&hash::to_hex(s));
206 buf.push('\n');
207 }
208 fs::write(mkit_dir.join(BISECT_FILE), buf.as_bytes())?;
209 Ok(())
210}
211
212pub fn cleanup_bisect(mkit_dir: &Path) -> BisectResult<()> {
217 let path = mkit_dir.join(BISECT_FILE);
218 match fs::remove_file(&path) {
219 Ok(()) => Ok(()),
220 Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(()),
221 Err(e) => Err(BisectError::Io(e)),
222 }
223}
224
225#[must_use]
227pub fn bisect_file_path(mkit_dir: &Path) -> PathBuf {
228 mkit_dir.join(BISECT_FILE)
229}
230
231pub fn enumerate_range(
237 store: &ObjectStore,
238 bad: Hash,
239 good_hashes: &[Hash],
240) -> BisectResult<Vec<Hash>> {
241 let mut good_set: HashSet<Hash> = HashSet::new();
242 for g in good_hashes {
243 collect_ancestor_set(store, *g, &mut good_set);
244 }
245
246 let mut visited: HashSet<Hash> = HashSet::new();
247 let mut candidates: Vec<Hash> = Vec::new();
248 let mut queue: Vec<Hash> = vec![bad];
249 let mut head = 0usize;
250 while head < queue.len() {
251 let current = queue[head];
252 head += 1;
253 if !visited.insert(current) {
254 continue;
255 }
256 if good_set.contains(¤t) {
257 continue;
258 }
259 candidates.push(current);
260 let Ok(obj) = store.read_object(¤t) else {
261 continue;
262 };
263 if let Object::Commit(commit) = obj {
264 for p in commit.parents {
265 queue.push(p);
266 }
267 }
268 }
269 Ok(candidates)
270}
271
272#[must_use]
275pub fn pick_midpoint(candidates: &[Hash]) -> Hash {
276 if candidates.is_empty() {
277 return ZERO;
278 }
279 candidates[candidates.len() / 2]
280}
281
282#[must_use]
291pub fn pick_midpoint_skip(candidates: &[Hash], skipped: &BTreeSet<Hash>) -> Hash {
292 if candidates.is_empty() {
293 return ZERO;
294 }
295 let mid = candidates.len() / 2;
296 for delta in 0..=candidates.len() {
298 if let Some(idx) = mid.checked_add(delta).filter(|&i| i < candidates.len()) {
300 let h = candidates[idx];
301 if !skipped.contains(&h) {
302 return h;
303 }
304 }
305 if let (true, Some(idx)) = (delta > 0, mid.checked_sub(delta)) {
307 let h = candidates[idx];
308 if !skipped.contains(&h) {
309 return h;
310 }
311 }
312 }
313 ZERO
315}
316
317pub fn next_step(store: &ObjectStore, state: &BisectState) -> BisectResult<BisectStep> {
331 let Some(bad) = state.bad_hash else {
332 return Ok(BisectStep::NeedMore);
333 };
334 if state.good_hashes.is_empty() {
335 return Ok(BisectStep::NeedMore);
336 }
337 let candidates = enumerate_range(store, bad, &state.good_hashes)?;
338 if candidates.is_empty() {
339 return Ok(BisectStep::Found(bad));
340 }
341 let non_skipped: Vec<Hash> = candidates
344 .iter()
345 .copied()
346 .filter(|h| !state.skipped.contains(h))
347 .collect();
348 if non_skipped.len() == 1 {
349 return Ok(BisectStep::Found(non_skipped[0]));
350 }
351 if non_skipped.is_empty() {
352 return Ok(BisectStep::Found(bad));
354 }
355 let mid = pick_midpoint_skip(&candidates, &state.skipped);
356 Ok(BisectStep::Testing {
357 hash: mid,
358 remaining: candidates.len(),
359 })
360}
361
362fn collect_ancestor_set(store: &ObjectStore, start: Hash, set: &mut HashSet<Hash>) {
365 let mut stack: Vec<Hash> = vec![start];
366 let mut count = 0usize;
367 while let Some(current) = stack.pop() {
368 if count >= MAX_ANCESTORS {
369 break;
370 }
371 if !set.insert(current) {
372 continue;
373 }
374 count += 1;
375 let Ok(obj) = store.read_object(¤t) else {
376 continue;
377 };
378 if let Object::Commit(commit) = obj {
379 for p in commit.parents {
380 stack.push(p);
381 }
382 }
383 }
384}
385
386#[cfg(test)]
387mod tests {
388 use super::*;
389 use crate::object::{Commit, Identity, Tree, TreeEntry};
390 use crate::serialize;
391 use tempfile::TempDir;
392
393 fn fresh_store() -> (TempDir, ObjectStore) {
394 let dir = TempDir::new().unwrap();
395 let store = ObjectStore::init(dir.path()).unwrap();
396 (dir, store)
397 }
398
399 fn put_blob(store: &ObjectStore, data: &[u8]) -> Hash {
400 let bytes = serialize::serialize(&Object::Blob(crate::object::Blob {
401 data: data.to_vec(),
402 }))
403 .unwrap();
404 store.write(&bytes).unwrap()
405 }
406
407 fn put_tree(store: &ObjectStore, name: &str, blob_h: Hash) -> Hash {
408 let tree = Object::Tree(Tree {
409 entries: vec![TreeEntry {
410 name: name.as_bytes().to_vec(),
411 mode: crate::object::EntryMode::Blob,
412 object_hash: blob_h,
413 }],
414 });
415 store.write(&serialize::serialize(&tree).unwrap()).unwrap()
416 }
417
418 fn put_commit(store: &ObjectStore, tree_h: Hash, parents: Vec<Hash>, ts: u64) -> Hash {
419 let commit = Object::Commit(Commit::new_unannotated(
420 tree_h,
421 parents,
422 Identity::ed25519([0u8; 32]),
423 [0u8; 32],
424 b"msg".to_vec(),
425 ts,
426 [0u8; 64],
427 ));
428 store
429 .write(&serialize::serialize(&commit).unwrap())
430 .unwrap()
431 }
432
433 #[test]
434 fn state_roundtrip_with_branch_and_bad() {
435 let tmp = TempDir::new().unwrap();
436 let mkit = tmp.path().join(".mkit");
437 fs::create_dir_all(&mkit).unwrap();
438 let state = BisectState {
439 orig_head: hash::hash(b"orig-head"),
440 orig_branch: Some("main".to_string()),
441 bad_hash: Some(hash::hash(b"bad")),
442 good_hashes: vec![hash::hash(b"g1"), hash::hash(b"g2")],
443 skipped: BTreeSet::new(),
444 };
445 write_state(&mkit, &state).unwrap();
446 let read = read_state(&mkit).unwrap();
447 assert_eq!(read, state);
448 }
449
450 #[test]
451 fn state_roundtrip_with_no_branch_no_bad() {
452 let tmp = TempDir::new().unwrap();
453 let mkit = tmp.path().join(".mkit");
454 fs::create_dir_all(&mkit).unwrap();
455 let state = BisectState {
456 orig_head: hash::hash(b"head"),
457 orig_branch: None,
458 bad_hash: None,
459 good_hashes: Vec::new(),
460 skipped: BTreeSet::new(),
461 };
462 write_state(&mkit, &state).unwrap();
463 let read = read_state(&mkit).unwrap();
464 assert_eq!(read, state);
465 }
466
467 #[test]
468 fn state_roundtrip_with_skipped_hashes() {
469 let tmp = TempDir::new().unwrap();
470 let mkit = tmp.path().join(".mkit");
471 fs::create_dir_all(&mkit).unwrap();
472 let s1 = hash::hash(b"skip1");
473 let s2 = hash::hash(b"skip2");
474 let mut skipped = BTreeSet::new();
475 skipped.insert(s1);
476 skipped.insert(s2);
477 let state = BisectState {
478 orig_head: hash::hash(b"head"),
479 orig_branch: Some("main".to_string()),
480 bad_hash: Some(hash::hash(b"bad")),
481 good_hashes: vec![hash::hash(b"good")],
482 skipped,
483 };
484 write_state(&mkit, &state).unwrap();
485 let read = read_state(&mkit).unwrap();
486 assert_eq!(read, state);
487 assert_eq!(read.skipped.len(), 2);
488 assert!(read.skipped.contains(&s1));
489 assert!(read.skipped.contains(&s2));
490 }
491
492 #[test]
493 fn old_state_file_without_skip_deserializes_with_empty_skipped() {
494 let tmp = TempDir::new().unwrap();
496 let mkit = tmp.path().join(".mkit");
497 fs::create_dir_all(&mkit).unwrap();
498 let orig_head = hash::hash(b"orig");
499 let bad = hash::hash(b"bad");
500 let good = hash::hash(b"good");
501 let content = format!(
503 "{}\n\n{}\n{}\n",
504 hash::to_hex(&orig_head),
505 hash::to_hex(&bad),
506 hash::to_hex(&good)
507 );
508 fs::write(mkit.join(BISECT_FILE), content.as_bytes()).unwrap();
509 let read = read_state(&mkit).unwrap();
510 assert_eq!(read.orig_head, orig_head);
511 assert_eq!(read.bad_hash, Some(bad));
512 assert_eq!(read.good_hashes, vec![good]);
513 assert!(
514 read.skipped.is_empty(),
515 "old files must deserialize with empty skipped"
516 );
517 }
518
519 #[test]
520 fn read_state_when_no_bisect_returns_error() {
521 let tmp = TempDir::new().unwrap();
522 let mkit = tmp.path().join(".mkit");
523 fs::create_dir_all(&mkit).unwrap();
524 let err = read_state(&mkit).unwrap_err();
525 assert!(matches!(err, BisectError::NoBisectInProgress));
526 }
527
528 #[test]
529 fn read_state_rejects_empty_file() {
530 let tmp = TempDir::new().unwrap();
531 let mkit = tmp.path().join(".mkit");
532 fs::create_dir_all(&mkit).unwrap();
533 fs::write(mkit.join(BISECT_FILE), b"").unwrap();
534 let err = read_state(&mkit).unwrap_err();
535 assert!(matches!(err, BisectError::InvalidBisectState));
536 }
537
538 #[test]
539 fn is_bisect_in_progress_detection() {
540 let tmp = TempDir::new().unwrap();
541 let mkit = tmp.path().join(".mkit");
542 fs::create_dir_all(&mkit).unwrap();
543 assert!(!is_bisect_in_progress(&mkit));
544 fs::write(mkit.join(BISECT_FILE), b"placeholder").unwrap();
545 assert!(is_bisect_in_progress(&mkit));
546 }
547
548 #[test]
549 fn cleanup_removes_state_file() {
550 let tmp = TempDir::new().unwrap();
551 let mkit = tmp.path().join(".mkit");
552 fs::create_dir_all(&mkit).unwrap();
553 fs::write(mkit.join(BISECT_FILE), b"x\n").unwrap();
554 cleanup_bisect(&mkit).unwrap();
555 assert!(!is_bisect_in_progress(&mkit));
556 }
557
558 #[test]
559 fn cleanup_on_missing_file_is_noop() {
560 let tmp = TempDir::new().unwrap();
561 let mkit = tmp.path().join(".mkit");
562 fs::create_dir_all(&mkit).unwrap();
563 cleanup_bisect(&mkit).unwrap();
564 }
565
566 #[test]
567 fn pick_midpoint_returns_middle_element() {
568 let c1 = hash::hash(b"c1");
569 let c2 = hash::hash(b"c2");
570 let c3 = hash::hash(b"c3");
571 let c4 = hash::hash(b"c4");
572 let c5 = hash::hash(b"c5");
573 assert_eq!(pick_midpoint(&[c1, c2, c3, c4, c5]), c3);
575 assert_eq!(pick_midpoint(&[c1, c2, c3, c4]), c3);
577 assert_eq!(pick_midpoint(&[c1]), c1);
578 }
579
580 #[test]
581 fn pick_midpoint_empty_returns_zero() {
582 assert_eq!(pick_midpoint(&[]), ZERO);
583 }
584
585 #[test]
586 fn enumerate_range_on_linear_chain() {
587 let (_d, store) = fresh_store();
588 let blob = put_blob(&store, b"data");
589 let tree = put_tree(&store, "f.txt", blob);
590 let mut commits = Vec::new();
591 commits.push(put_commit(&store, tree, vec![], 1));
592 for i in 1..8 {
593 let parent = commits[i - 1];
594 commits.push(put_commit(&store, tree, vec![parent], (i + 1) as u64));
595 }
596 let cands = enumerate_range(&store, commits[7], &[commits[1]]).unwrap();
599 assert_eq!(cands.len(), 6);
600 assert_eq!(cands[0], commits[7]);
601 }
602
603 #[test]
604 fn enumerate_range_with_diamond_merge() {
605 let (_d, store) = fresh_store();
606 let blob = put_blob(&store, b"d");
607 let tree = put_tree(&store, "f.txt", blob);
608 let c1 = put_commit(&store, tree, vec![], 1);
609 let c2 = put_commit(&store, tree, vec![c1], 2);
610 let c3 = put_commit(&store, tree, vec![c1], 3);
611 let c4 = put_commit(&store, tree, vec![c2, c3], 4);
612 let cands = enumerate_range(&store, c4, &[c1]).unwrap();
613 assert_eq!(cands.len(), 3);
614 assert_eq!(cands[0], c4);
615 }
616
617 #[test]
618 fn enumerate_range_same_good_and_bad_yields_empty() {
619 let (_d, store) = fresh_store();
620 let blob = put_blob(&store, b"d");
621 let tree = put_tree(&store, "f.txt", blob);
622 let c1 = put_commit(&store, tree, vec![], 1);
623 let cands = enumerate_range(&store, c1, &[c1]).unwrap();
624 assert!(cands.is_empty());
625 }
626
627 #[test]
628 fn next_step_need_more_when_no_bad() {
629 let (_d, store) = fresh_store();
630 let state = BisectState {
631 orig_head: ZERO,
632 orig_branch: None,
633 bad_hash: None,
634 good_hashes: vec![hash::hash(b"x")],
635 skipped: BTreeSet::new(),
636 };
637 assert_eq!(next_step(&store, &state).unwrap(), BisectStep::NeedMore);
638 }
639
640 #[test]
641 fn next_step_drives_bisect_to_completion() {
642 let (_d, store) = fresh_store();
644 let blob = put_blob(&store, b"data");
645 let tree = put_tree(&store, "f.txt", blob);
646 let mut commits = Vec::new();
647 commits.push(put_commit(&store, tree, vec![], 1));
648 for i in 1..6 {
649 let parent = commits[i - 1];
650 commits.push(put_commit(&store, tree, vec![parent], (i + 1) as u64));
651 }
652 let mut state = BisectState {
654 orig_head: commits[5],
655 orig_branch: None,
656 bad_hash: Some(commits[5]),
657 good_hashes: vec![commits[0]],
658 skipped: BTreeSet::new(),
659 };
660 let truth = commits[3];
663
664 let mut iters = 0;
665 loop {
666 assert!(iters < 8, "bisect should converge within 8 iterations");
667 iters += 1;
668 match next_step(&store, &state).unwrap() {
669 BisectStep::Found(h) => {
670 assert_eq!(h, truth);
671 break;
672 }
673 BisectStep::Testing { hash, .. } => {
674 let idx = commits.iter().position(|c| *c == hash).unwrap();
677 let truth_idx = commits.iter().position(|c| *c == truth).unwrap();
678 if idx < truth_idx {
679 state.good_hashes.push(hash);
680 } else {
681 state.bad_hash = Some(hash);
682 }
683 }
684 BisectStep::NeedMore => panic!("unexpected NeedMore"),
685 }
686 }
687 }
688
689 #[test]
690 fn skip_advances_midpoint_to_neighbor() {
691 let (_d, store) = fresh_store();
695 let blob = put_blob(&store, b"data");
696 let tree = put_tree(&store, "f.txt", blob);
697 let mut commits = Vec::new();
698 commits.push(put_commit(&store, tree, vec![], 1)); for i in 1..6 {
700 let parent = commits[i - 1];
701 commits.push(put_commit(&store, tree, vec![parent], (i + 1) as u64));
702 }
703 let cands = enumerate_range(&store, commits[5], &[commits[0]]).unwrap();
705 let natural_mid = pick_midpoint(&cands);
707 assert_ne!(natural_mid, ZERO, "should have a natural midpoint");
708
709 let mut skipped = BTreeSet::new();
711 skipped.insert(natural_mid);
712
713 let skipped_mid = pick_midpoint_skip(&cands, &skipped);
714 assert_ne!(
715 skipped_mid, ZERO,
716 "should find a neighbor when mid is skipped"
717 );
718 assert_ne!(
719 skipped_mid, natural_mid,
720 "skip must advance past the natural midpoint"
721 );
722 assert!(
723 !skipped.contains(&skipped_mid),
724 "returned hash must not be in skipped set"
725 );
726 }
727
728 #[test]
729 fn next_step_skip_advances_then_finds() {
730 let (_d, store) = fresh_store();
733 let blob = put_blob(&store, b"data");
734 let tree = put_tree(&store, "f.txt", blob);
735 let mut commits = Vec::new();
736 commits.push(put_commit(&store, tree, vec![], 1));
737 for i in 1..6 {
738 let p = commits[i - 1];
739 commits.push(put_commit(&store, tree, vec![p], (i + 1) as u64));
740 }
741 let truth = commits[3]; let cands = enumerate_range(&store, commits[5], &[commits[0]]).unwrap();
745 let first_natural = pick_midpoint(&cands);
746 let mut skipped = BTreeSet::new();
747 skipped.insert(first_natural);
748
749 let mut state = BisectState {
750 orig_head: commits[5],
751 orig_branch: None,
752 bad_hash: Some(commits[5]),
753 good_hashes: vec![commits[0]],
754 skipped,
755 };
756
757 let mut iters = 0;
758 loop {
759 assert!(iters < 10, "bisect with skip should converge");
760 iters += 1;
761 match next_step(&store, &state).unwrap() {
762 BisectStep::Found(_) => break,
763 BisectStep::Testing { hash, .. } => {
764 let idx = commits.iter().position(|c| *c == hash).unwrap();
765 let truth_idx = commits.iter().position(|c| *c == truth).unwrap();
766 if idx < truth_idx {
767 state.good_hashes.push(hash);
768 } else {
769 state.bad_hash = Some(hash);
770 }
771 }
772 BisectStep::NeedMore => panic!("unexpected NeedMore"),
773 }
774 }
775 }
776
777 #[test]
778 fn pick_midpoint_skip_all_skipped_returns_zero() {
779 let c1 = hash::hash(b"c1");
780 let c2 = hash::hash(b"c2");
781 let c3 = hash::hash(b"c3");
782 let mut skipped = BTreeSet::new();
783 skipped.insert(c1);
784 skipped.insert(c2);
785 skipped.insert(c3);
786 assert_eq!(pick_midpoint_skip(&[c1, c2, c3], &skipped), ZERO);
787 }
788}