1use std::fs;
28use std::io;
29use std::path::{Path, PathBuf};
30
31use crate::atomic::write_atomic;
32use crate::hash::{HASH_LEN, Hash};
33use crate::object::{EntryMode, Object};
34use crate::store::{MAX_TREE_DEPTH, ObjectStore, StoreError};
35
36pub const MAGIC: [u8; 4] = *b"MKIX";
38pub const FORMAT_VERSION: u8 = 0x02;
41pub const FORMAT_VERSION_V1: u8 = 0x01;
43pub const MAX_INDEX_BYTES: u64 = 64 * 1024 * 1024;
45pub const MAX_PATH_LEN: usize = 4096;
47
48pub const INDEX_FILE: &str = ".mkit/index";
50
51#[repr(u8)]
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
54pub enum EntryStatus {
55 Removed = 0x00,
57 Blob = 0x01,
59 Tree = 0x02,
61 Symlink = 0x03,
63 Executable = 0x04,
65}
66
67impl EntryStatus {
68 #[must_use]
70 pub fn from_byte(b: u8) -> Option<Self> {
71 match b {
72 0x00 => Some(Self::Removed),
73 0x01 => Some(Self::Blob),
74 0x02 => Some(Self::Tree),
75 0x03 => Some(Self::Symlink),
76 0x04 => Some(Self::Executable),
77 _ => None,
78 }
79 }
80}
81
82#[derive(Debug, Clone, PartialEq, Eq)]
84pub struct IndexEntry {
85 pub path: String,
87 pub status: EntryStatus,
89 pub object_hash: Hash,
91 pub mtime_ns: u64,
95 pub size: u64,
98 pub ino: u64,
102 pub ctime_ns: u64,
106}
107
108#[derive(Debug, Default, Clone, PartialEq, Eq)]
110pub struct Index {
111 pub entries: Vec<IndexEntry>,
113}
114
115impl Index {
116 #[must_use]
118 pub const fn new() -> Self {
119 Self {
120 entries: Vec::new(),
121 }
122 }
123
124 #[must_use]
126 pub fn find_entry(&self, path: &str) -> Option<usize> {
127 self.entries.iter().position(|e| e.path == path)
128 }
129
130 #[must_use]
135 pub fn tracks_path_or_descendant(&self, path: &str) -> bool {
136 self.entries.iter().any(|e| {
137 e.status != EntryStatus::Removed
138 && (e.path == path
139 || (e.path.len() > path.len()
140 && e.path.starts_with(path)
141 && e.path.as_bytes().get(path.len()) == Some(&b'/')))
142 })
143 }
144
145 #[must_use]
156 pub fn has_tracked_file_at(&self, path: &str) -> bool {
157 self.find_entry(path)
158 .is_some_and(|i| self.entries[i].status != EntryStatus::Removed)
159 }
160
161 #[must_use]
163 pub fn staged_count(&self) -> usize {
164 self.entries
165 .iter()
166 .filter(|e| e.status != EntryStatus::Removed)
167 .count()
168 }
169
170 #[must_use]
176 pub fn serialize(&self) -> Vec<u8> {
177 let body: usize = self
180 .entries
181 .iter()
182 .map(|e| 1 + HASH_LEN + 8 + 8 + 8 + 8 + 2 + e.path.len())
183 .sum();
184 let mut out = Vec::with_capacity(9 + body);
185 out.extend_from_slice(&MAGIC);
186 out.push(FORMAT_VERSION);
187 let count = u32::try_from(self.entries.len()).expect("index entry count fits in u32");
188 out.extend_from_slice(&count.to_le_bytes());
189 for entry in &self.entries {
190 out.push(entry.status as u8);
191 out.extend_from_slice(&entry.object_hash);
192 out.extend_from_slice(&entry.mtime_ns.to_le_bytes());
193 out.extend_from_slice(&entry.size.to_le_bytes());
194 out.extend_from_slice(&entry.ino.to_le_bytes());
195 out.extend_from_slice(&entry.ctime_ns.to_le_bytes());
196 let path_len =
197 u16::try_from(entry.path.len()).expect("index entry path length fits in u16");
198 out.extend_from_slice(&path_len.to_le_bytes());
199 out.extend_from_slice(entry.path.as_bytes());
200 }
201 out
202 }
203}
204
205#[derive(Debug, thiserror::Error)]
207pub enum IndexError {
208 #[error("index file has wrong magic (expected MKIX)")]
210 BadMagic,
211 #[error("unsupported index version: {0:#x}")]
213 UnsupportedVersion(u8),
214 #[error("index entry has unknown status byte {0:#x}")]
216 BadStatus(u8),
217 #[error("index file is corrupt")]
219 Corrupt,
220 #[error("index file too large (>{MAX_INDEX_BYTES} bytes)")]
222 TooLarge,
223 #[error("invalid index path '{0}'")]
225 InvalidPath(String),
226 #[error("duplicate index path '{0}'")]
228 DuplicatePath(String),
229 #[error("index path is not valid UTF-8")]
231 InvalidPathEncoding,
232 #[error(transparent)]
234 Io(#[from] io::Error),
235 #[error(transparent)]
237 Store(#[from] StoreError),
238 #[error("object is not a tree")]
240 NotTree,
241 #[error("tree nesting exceeds {} levels", MAX_TREE_DEPTH)]
244 TreeTooDeep,
245}
246
247pub type IndexResult<T> = Result<T, IndexError>;
249
250pub fn deserialize(data: &[u8]) -> IndexResult<Index> {
259 if data.len() < 9 {
260 return Err(IndexError::Corrupt);
261 }
262 if data[0..4] != MAGIC {
263 return Err(IndexError::BadMagic);
264 }
265 let version = data[4];
266 if version != FORMAT_VERSION && version != FORMAT_VERSION_V1 {
267 return Err(IndexError::UnsupportedVersion(version));
268 }
269 let stat_cache_len: usize = if version == FORMAT_VERSION { 32 } else { 0 };
272 let min_entry_len = 1 + HASH_LEN + stat_cache_len + 2;
274 let count = u32::from_le_bytes([data[5], data[6], data[7], data[8]]) as usize;
275 if (count as u64).saturating_mul(min_entry_len as u64) > data.len() as u64 {
283 return Err(IndexError::Corrupt);
284 }
285 let mut entries = Vec::with_capacity(count.min(1024)); let mut seen_paths = std::collections::HashSet::with_capacity(count.min(1024));
287 let mut offset = 9usize;
288 for _ in 0..count {
289 if offset + min_entry_len > data.len() {
290 return Err(IndexError::Corrupt);
291 }
292 let status =
293 EntryStatus::from_byte(data[offset]).ok_or(IndexError::BadStatus(data[offset]))?;
294 offset += 1;
295 let mut object_hash = [0u8; HASH_LEN];
296 object_hash.copy_from_slice(&data[offset..offset + HASH_LEN]);
297 offset += HASH_LEN;
298 let (mtime_ns, size, ino, ctime_ns) = if version == FORMAT_VERSION {
300 let mut next_u64 = || {
301 let v = u64::from_le_bytes(data[offset..offset + 8].try_into().expect("8 bytes"));
302 offset += 8;
303 v
304 };
305 (next_u64(), next_u64(), next_u64(), next_u64())
306 } else {
307 (0, 0, 0, 0)
308 };
309 let path_len = u16::from_le_bytes([data[offset], data[offset + 1]]) as usize;
310 offset += 2;
311 if path_len > MAX_PATH_LEN {
312 return Err(IndexError::Corrupt);
313 }
314 if offset + path_len > data.len() {
315 return Err(IndexError::Corrupt);
316 }
317 let path_bytes = &data[offset..offset + path_len];
318 let path = core::str::from_utf8(path_bytes)
319 .map_err(|_| IndexError::InvalidPathEncoding)?
320 .to_string();
321 offset += path_len;
322 if !validate_index_path(&path) {
323 return Err(IndexError::InvalidPath(path));
324 }
325 if !seen_paths.insert(path.clone()) {
326 return Err(IndexError::DuplicatePath(path));
327 }
328 entries.push(IndexEntry {
329 path,
330 status,
331 object_hash,
332 mtime_ns,
333 size,
334 ino,
335 ctime_ns,
336 });
337 }
338 if offset != data.len() {
339 return Err(IndexError::Corrupt);
340 }
341 Ok(Index { entries })
342}
343
344pub fn read_index(root: &Path) -> IndexResult<Index> {
347 let path = root.join(INDEX_FILE);
348 let meta = match fs::metadata(&path) {
349 Ok(m) => m,
350 Err(e) if e.kind() == io::ErrorKind::NotFound => return Ok(Index::new()),
351 Err(e) => return Err(IndexError::Io(e)),
352 };
353 if meta.len() == 0 {
354 return Ok(Index::new());
355 }
356 if meta.len() > MAX_INDEX_BYTES {
357 return Err(IndexError::TooLarge);
358 }
359 let bytes = fs::read(&path)?;
360 let mut idx = deserialize(&bytes)?;
361 let index_mtime_ns = crate::worktree::mtime_nanos(&meta);
371 let index_ns_precise = !index_mtime_ns.is_multiple_of(1_000_000_000);
379 for e in &mut idx.entries {
380 if e.mtime_ns == 0 {
381 continue;
382 }
383 let window = if index_ns_precise && !e.mtime_ns.is_multiple_of(1_000_000_000) {
384 RACY_WINDOW_NS / 100
385 } else {
386 RACY_WINDOW_NS
387 };
388 if e.mtime_ns >= index_mtime_ns.saturating_sub(window) {
389 e.mtime_ns = 0;
390 e.size = 0;
391 e.ino = 0;
392 e.ctime_ns = 0;
393 }
394 }
395 Ok(idx)
396}
397
398const RACY_WINDOW_NS: u64 = 1_000_000_000;
405
406pub fn write_index(root: &Path, idx: &Index) -> IndexResult<()> {
416 let path = root.join(INDEX_FILE);
417 write_atomic(&path, &idx.serialize(), true)?;
418 Ok(())
419}
420
421pub fn from_tree(store: &ObjectStore, tree_hash: Hash) -> IndexResult<Index> {
432 let mut idx = Index::new();
433 push_tree_entries(store, tree_hash, "", &mut idx, 0)?;
434 Ok(idx)
435}
436
437fn push_tree_entries(
438 store: &ObjectStore,
439 tree_hash: Hash,
440 prefix: &str,
441 idx: &mut Index,
442 depth: usize,
443) -> IndexResult<()> {
444 if depth > MAX_TREE_DEPTH {
445 return Err(IndexError::TreeTooDeep);
446 }
447 let Object::Tree(tree) = store.read_object(&tree_hash)? else {
448 return Err(IndexError::NotTree);
449 };
450 for entry in tree.entries {
451 let name = String::from_utf8(entry.name).map_err(|_| IndexError::InvalidPathEncoding)?;
452 let path = if prefix.is_empty() {
453 name
454 } else {
455 format!("{prefix}/{name}")
456 };
457 match entry.mode {
458 EntryMode::Tree => {
459 push_tree_entries(store, entry.object_hash, &path, idx, depth + 1)?;
460 }
461 EntryMode::Blob | EntryMode::Executable | EntryMode::Symlink => {
462 if !validate_index_path(&path) {
463 return Err(IndexError::InvalidPath(path));
464 }
465 let status = match entry.mode {
466 EntryMode::Blob => EntryStatus::Blob,
467 EntryMode::Executable => EntryStatus::Executable,
468 EntryMode::Symlink => EntryStatus::Symlink,
469 EntryMode::Tree => unreachable!("handled above"),
470 };
471 idx.entries.push(IndexEntry {
472 path,
473 status,
474 object_hash: entry.object_hash,
475 mtime_ns: 0,
478 size: 0,
479 ino: 0,
480 ctime_ns: 0,
481 });
482 }
483 }
484 }
485 Ok(())
486}
487
488#[must_use]
490pub fn index_path(root: &Path) -> PathBuf {
491 root.join(INDEX_FILE)
492}
493
494#[must_use]
497pub fn validate_index_path(path: &str) -> bool {
498 if path.is_empty() {
499 return false;
500 }
501 if path.starts_with('/') {
502 return false;
503 }
504 if path.len() > MAX_PATH_LEN {
505 return false;
506 }
507 if path == ".mkit" || path == ".git" {
508 return false;
509 }
510 if path.starts_with(".mkit/") || path.starts_with(".git/") {
511 return false;
512 }
513 for part in path.split('/') {
514 if part.is_empty() {
515 return false;
516 }
517 if part == "." || part == ".." {
518 return false;
519 }
520 for &c in part.as_bytes() {
521 if c == 0 || c == b'\\' {
522 return false;
523 }
524 }
525 }
526 true
527}
528
529#[cfg(test)]
530mod tests {
531 use super::*;
532 use crate::hash;
533 use tempfile::TempDir;
534
535 fn seed_hash(s: &str) -> Hash {
536 hash::hash(s.as_bytes())
537 }
538
539 #[test]
540 fn empty_index_round_trip() {
541 let idx = Index::new();
542 let bytes = idx.serialize();
543 assert_eq!(bytes.len(), 9);
545 assert_eq!(&bytes[0..4], &MAGIC);
546 assert_eq!(bytes[4], FORMAT_VERSION);
547 assert_eq!(&bytes[5..9], &0u32.to_le_bytes());
548 let parsed = deserialize(&bytes).unwrap();
549 assert_eq!(parsed, idx);
550 }
551
552 #[test]
558 fn v2_single_entry_pinned_bytes() {
559 let h = seed_hash("hello");
560 let idx = Index {
561 entries: vec![IndexEntry {
562 path: "hello.txt".to_string(),
563 status: EntryStatus::Blob,
564 object_hash: h,
565 mtime_ns: 0x0102_0304_0506_0708,
566 size: 11,
567 ino: 0x0A0B_0C0D_0E0F_1011,
568 ctime_ns: 0x1112_1314_1516_1718,
569 }],
570 };
571 let bytes = idx.serialize();
572 assert_eq!(bytes.len(), 85);
573 let mut expected = Vec::new();
574 expected.extend_from_slice(b"MKIX");
575 expected.push(0x02); expected.extend_from_slice(&1u32.to_le_bytes());
577 expected.push(0x01); expected.extend_from_slice(&h);
579 expected.extend_from_slice(&0x0102_0304_0506_0708u64.to_le_bytes());
580 expected.extend_from_slice(&11u64.to_le_bytes());
581 expected.extend_from_slice(&0x0A0B_0C0D_0E0F_1011u64.to_le_bytes());
582 expected.extend_from_slice(&0x1112_1314_1516_1718u64.to_le_bytes());
583 expected.extend_from_slice(&9u16.to_le_bytes());
584 expected.extend_from_slice(b"hello.txt");
585 assert_eq!(bytes, expected, "v2 byte layout is pinned");
586 assert_eq!(deserialize(&bytes).unwrap(), idx);
587 }
588
589 #[test]
593 fn reads_v1_index_with_zeroed_stat_cache() {
594 let h = seed_hash("hello");
595 let mut v1 = Vec::new();
596 v1.extend_from_slice(b"MKIX");
597 v1.push(0x01);
598 v1.extend_from_slice(&1u32.to_le_bytes());
599 v1.push(0x01); v1.extend_from_slice(&h);
601 v1.extend_from_slice(&9u16.to_le_bytes());
602 v1.extend_from_slice(b"hello.txt");
603 assert_eq!(v1.len(), 53);
604
605 let parsed = deserialize(&v1).unwrap();
606 assert_eq!(parsed.entries.len(), 1);
607 let e = &parsed.entries[0];
608 assert_eq!(e.path, "hello.txt");
609 assert_eq!(e.object_hash, h);
610 assert_eq!(e.mtime_ns, 0, "v1 entries carry no stat cache");
611 assert_eq!(e.size, 0);
612 }
613
614 #[test]
615 fn rejects_v2_count_overflow_at_min_entry_bytes() {
616 let mut bytes = Vec::new();
619 bytes.extend_from_slice(b"MKIX");
620 bytes.push(0x02);
621 bytes.extend_from_slice(&u32::MAX.to_le_bytes());
622 assert!(matches!(deserialize(&bytes), Err(IndexError::Corrupt)));
623 let mut short = Vec::new();
625 short.extend_from_slice(b"MKIX");
626 short.push(0x02);
627 short.extend_from_slice(&1u32.to_le_bytes());
628 short.extend_from_slice(&[0u8; 60]);
629 assert!(matches!(deserialize(&short), Err(IndexError::Corrupt)));
630 }
631
632 #[test]
633 fn rejects_unknown_version_0x03() {
634 let mut bytes = Vec::new();
635 bytes.extend_from_slice(b"MKIX");
636 bytes.push(0x03);
637 bytes.extend_from_slice(&0u32.to_le_bytes());
638 assert!(matches!(
639 deserialize(&bytes),
640 Err(IndexError::UnsupportedVersion(0x03))
641 ));
642 }
643
644 #[test]
649 fn read_index_invalidates_racy_entries() {
650 let dir = TempDir::new().unwrap();
651 let now_ns = u64::try_from(
652 std::time::SystemTime::now()
653 .duration_since(std::time::UNIX_EPOCH)
654 .unwrap()
655 .as_nanos(),
656 )
657 .unwrap();
658 let idx = Index {
659 entries: vec![
660 IndexEntry {
661 path: "racy.txt".to_string(),
662 status: EntryStatus::Blob,
663 object_hash: seed_hash("racy"),
664 mtime_ns: now_ns,
665 size: 4,
666 ino: 0,
667 ctime_ns: 0,
668 },
669 IndexEntry {
670 path: "settled.txt".to_string(),
671 status: EntryStatus::Blob,
672 object_hash: seed_hash("settled"),
673 mtime_ns: now_ns - 10_000_000_000, size: 7,
675 ino: 0,
676 ctime_ns: 0,
677 },
678 ],
679 };
680 write_index(dir.path(), &idx).unwrap();
681 let f = fs::File::options()
686 .write(true)
687 .open(index_path(dir.path()))
688 .unwrap();
689 f.set_times(
690 fs::FileTimes::new()
691 .set_modified(std::time::UNIX_EPOCH + std::time::Duration::from_nanos(now_ns)),
692 )
693 .unwrap();
694 drop(f);
695 let read = read_index(dir.path()).unwrap();
696 let racy = &read.entries[read.find_entry("racy.txt").unwrap()];
697 let settled = &read.entries[read.find_entry("settled.txt").unwrap()];
698 assert_eq!(
699 racy.mtime_ns, 0,
700 "an entry touched within the racy window must lose its cache"
701 );
702 assert_eq!(racy.size, 0);
703 assert_eq!(settled.mtime_ns, now_ns - 10_000_000_000);
704 assert_eq!(settled.size, 7);
705 }
706
707 #[test]
712 fn coarse_entry_mtime_keeps_one_second_window() {
713 let dir = TempDir::new().unwrap();
714 let base_ns: u64 = 1_700_000_000_000_000_000; let idx = Index {
716 entries: vec![
717 IndexEntry {
718 path: "coarse.txt".to_string(),
719 status: EntryStatus::Blob,
720 object_hash: seed_hash("coarse"),
721 mtime_ns: base_ns - 1_000_000_000,
724 size: 4,
725 ino: 0,
726 ctime_ns: 0,
727 },
728 IndexEntry {
729 path: "precise.txt".to_string(),
730 status: EntryStatus::Blob,
731 object_hash: seed_hash("precise"),
732 mtime_ns: base_ns - 1_000_000_000 + 123,
735 size: 7,
736 ino: 0,
737 ctime_ns: 0,
738 },
739 ],
740 };
741 write_index(dir.path(), &idx).unwrap();
742 let f = fs::File::options()
744 .write(true)
745 .open(index_path(dir.path()))
746 .unwrap();
747 f.set_times(fs::FileTimes::new().set_modified(
748 std::time::UNIX_EPOCH + std::time::Duration::from_nanos(base_ns - 500_000_000 + 777),
749 ))
750 .unwrap();
751 drop(f);
752
753 let read = read_index(dir.path()).unwrap();
754 let coarse = &read.entries[read.find_entry("coarse.txt").unwrap()];
755 let precise = &read.entries[read.find_entry("precise.txt").unwrap()];
756 assert_eq!(
757 coarse.mtime_ns, 0,
758 "coarse-mtime entry within 1s of the index write must be racy"
759 );
760 assert_ne!(
761 precise.mtime_ns, 0,
762 "ns-precise entry outside the 10ms window keeps its cache"
763 );
764 }
765
766 #[test]
767 fn tracks_path_or_descendant_matches_self_and_ancestors() {
768 let mut idx = Index::new();
769 idx.entries.push(IndexEntry {
770 path: "src/lib.rs".to_string(),
771 status: EntryStatus::Blob,
772 object_hash: seed_hash("lib"),
773 mtime_ns: 0,
774 size: 0,
775 ino: 0,
776 ctime_ns: 0,
777 });
778 idx.entries.push(IndexEntry {
779 path: "removed.txt".to_string(),
780 status: EntryStatus::Removed,
781 object_hash: hash::ZERO,
782 mtime_ns: 0,
783 size: 0,
784 ino: 0,
785 ctime_ns: 0,
786 });
787 assert!(idx.tracks_path_or_descendant("src/lib.rs"));
789 assert!(idx.tracks_path_or_descendant("src"));
790 assert!(!idx.tracks_path_or_descendant("sr"));
792 assert!(!idx.tracks_path_or_descendant("docs"));
794 assert!(!idx.tracks_path_or_descendant("removed.txt"));
795 }
796
797 #[test]
798 fn has_tracked_file_at_exact_only_and_not_removed() {
799 let mut idx = Index::new();
800 idx.entries.push(IndexEntry {
801 path: "f".to_string(),
802 status: EntryStatus::Blob,
803 object_hash: seed_hash("f"),
804 mtime_ns: 0,
805 size: 0,
806 ino: 0,
807 ctime_ns: 0,
808 });
809 idx.entries.push(IndexEntry {
810 path: "gone".to_string(),
811 status: EntryStatus::Removed,
812 object_hash: hash::ZERO,
813 mtime_ns: 0,
814 size: 0,
815 ino: 0,
816 ctime_ns: 0,
817 });
818 assert!(idx.has_tracked_file_at("f"));
820 idx.entries.push(IndexEntry {
823 path: "dir/inner.txt".to_string(),
824 status: EntryStatus::Blob,
825 object_hash: seed_hash("inner"),
826 mtime_ns: 0,
827 size: 0,
828 ino: 0,
829 ctime_ns: 0,
830 });
831 assert!(!idx.has_tracked_file_at("dir"));
832 assert!(idx.has_tracked_file_at("dir/inner.txt"));
833 assert!(!idx.has_tracked_file_at("gone"));
836 assert!(!idx.has_tracked_file_at("other"));
838 }
839
840 #[test]
841 fn single_entry_round_trip() {
842 let mut idx = Index::new();
843 idx.entries.push(IndexEntry {
844 path: "README.md".to_string(),
845 status: EntryStatus::Blob,
846 object_hash: seed_hash("readme"),
847 mtime_ns: 0,
848 size: 0,
849 ino: 0,
850 ctime_ns: 0,
851 });
852 let bytes = idx.serialize();
853 assert_eq!(bytes.len(), 85);
856 let parsed = deserialize(&bytes).unwrap();
857 assert_eq!(parsed, idx);
858 }
859
860 #[test]
861 fn multi_entry_round_trip_with_all_statuses() {
862 let mut idx = Index::new();
863 idx.entries.push(IndexEntry {
864 path: "a.txt".into(),
865 status: EntryStatus::Blob,
866 object_hash: seed_hash("a"),
867 mtime_ns: 0,
868 size: 0,
869 ino: 0,
870 ctime_ns: 0,
871 });
872 idx.entries.push(IndexEntry {
873 path: "b/sub".into(),
874 status: EntryStatus::Tree,
875 object_hash: seed_hash("b"),
876 mtime_ns: 0,
877 size: 0,
878 ino: 0,
879 ctime_ns: 0,
880 });
881 idx.entries.push(IndexEntry {
882 path: "c.link".into(),
883 status: EntryStatus::Symlink,
884 object_hash: seed_hash("c"),
885 mtime_ns: 0,
886 size: 0,
887 ino: 0,
888 ctime_ns: 0,
889 });
890 idx.entries.push(IndexEntry {
891 path: "scripts/build".into(),
892 status: EntryStatus::Executable,
893 object_hash: seed_hash("d"),
894 mtime_ns: 0,
895 size: 0,
896 ino: 0,
897 ctime_ns: 0,
898 });
899 idx.entries.push(IndexEntry {
900 path: "old.txt".into(),
901 status: EntryStatus::Removed,
902 object_hash: [0u8; HASH_LEN],
903 mtime_ns: 0,
904 size: 0,
905 ino: 0,
906 ctime_ns: 0,
907 });
908 let bytes = idx.serialize();
909 let parsed = deserialize(&bytes).unwrap();
910 assert_eq!(parsed, idx);
911 }
912
913 #[test]
914 fn rejects_bad_magic() {
915 let mut bytes = Index::new().serialize();
916 bytes[0] = b'X';
917 let err = deserialize(&bytes).unwrap_err();
918 assert!(matches!(err, IndexError::BadMagic));
919 }
920
921 #[test]
922 fn rejects_zmix_magic_explicitly() {
923 let bytes = [
927 0x5A,
928 0x4D,
929 0x49,
930 0x58, FORMAT_VERSION,
932 0,
933 0,
934 0,
935 0,
936 ];
937 let err = deserialize(&bytes).unwrap_err();
938 assert!(matches!(err, IndexError::BadMagic));
939 }
940
941 #[test]
942 fn rejects_unsupported_version() {
943 let mut bytes = Index::new().serialize();
944 bytes[4] = 0xFF;
945 let err = deserialize(&bytes).unwrap_err();
946 assert!(matches!(err, IndexError::UnsupportedVersion(0xFF)));
947 }
948
949 #[test]
950 fn rejects_truncated_header() {
951 let err = deserialize(b"MKIX").unwrap_err();
952 assert!(matches!(err, IndexError::Corrupt));
953 }
954
955 #[test]
956 fn rejects_truncated_entry() {
957 let mut idx = Index::new();
958 idx.entries.push(IndexEntry {
959 path: "a".into(),
960 status: EntryStatus::Blob,
961 object_hash: seed_hash("a"),
962 mtime_ns: 0,
963 size: 0,
964 ino: 0,
965 ctime_ns: 0,
966 });
967 let mut bytes = idx.serialize();
968 bytes.truncate(bytes.len() - 1); let err = deserialize(&bytes).unwrap_err();
970 assert!(matches!(err, IndexError::Corrupt));
971 }
972
973 #[test]
974 fn rejects_trailing_bytes_after_declared_entries() {
975 let mut idx = Index::new();
976 idx.entries.push(IndexEntry {
977 path: "a".into(),
978 status: EntryStatus::Blob,
979 object_hash: seed_hash("a"),
980 mtime_ns: 0,
981 size: 0,
982 ino: 0,
983 ctime_ns: 0,
984 });
985 let mut bytes = idx.serialize();
986 bytes.extend_from_slice(b"junk");
987 let err = deserialize(&bytes).unwrap_err();
988 assert!(matches!(err, IndexError::Corrupt));
989 }
990
991 #[test]
992 fn rejects_invalid_path_on_deserialize() {
993 let mut bytes = Vec::new();
994 bytes.extend_from_slice(&MAGIC);
995 bytes.push(FORMAT_VERSION);
996 bytes.extend_from_slice(&1u32.to_le_bytes());
997 bytes.push(EntryStatus::Blob as u8);
998 bytes.extend_from_slice(&[0u8; HASH_LEN]);
999 bytes.extend_from_slice(&[0u8; 32]); let path = b"../escape";
1001 let path_len = u16::try_from(path.len()).unwrap();
1002 bytes.extend_from_slice(&path_len.to_le_bytes());
1003 bytes.extend_from_slice(path);
1004 let err = deserialize(&bytes).unwrap_err();
1005 assert!(matches!(err, IndexError::InvalidPath(path) if path == "../escape"));
1006 }
1007
1008 #[test]
1009 fn rejects_duplicate_paths_on_deserialize() {
1010 let mut idx = Index::new();
1011 idx.entries.push(IndexEntry {
1012 path: "same.txt".into(),
1013 status: EntryStatus::Blob,
1014 object_hash: seed_hash("a"),
1015 mtime_ns: 0,
1016 size: 0,
1017 ino: 0,
1018 ctime_ns: 0,
1019 });
1020 idx.entries.push(IndexEntry {
1021 path: "same.txt".into(),
1022 status: EntryStatus::Executable,
1023 object_hash: seed_hash("b"),
1024 mtime_ns: 0,
1025 size: 0,
1026 ino: 0,
1027 ctime_ns: 0,
1028 });
1029 let err = deserialize(&idx.serialize()).unwrap_err();
1030 assert!(matches!(err, IndexError::DuplicatePath(path) if path == "same.txt"));
1031 }
1032
1033 #[test]
1034 fn rejects_path_len_overflow() {
1035 let mut bytes = Vec::new();
1037 bytes.extend_from_slice(&MAGIC);
1038 bytes.push(FORMAT_VERSION);
1039 bytes.extend_from_slice(&1u32.to_le_bytes());
1040 bytes.push(EntryStatus::Blob as u8);
1041 bytes.extend_from_slice(&[0u8; HASH_LEN]);
1042 bytes.extend_from_slice(&1000u16.to_le_bytes());
1043 bytes.push(b'a');
1044 let err = deserialize(&bytes).unwrap_err();
1045 assert!(matches!(err, IndexError::Corrupt));
1046 }
1047
1048 #[test]
1049 fn rejects_unknown_status_byte() {
1050 let mut bytes = Vec::new();
1051 bytes.extend_from_slice(&MAGIC);
1052 bytes.push(FORMAT_VERSION);
1053 bytes.extend_from_slice(&1u32.to_le_bytes());
1054 bytes.push(0x77); bytes.extend_from_slice(&[0u8; HASH_LEN]);
1056 bytes.extend_from_slice(&[0u8; 32]); bytes.extend_from_slice(&0u16.to_le_bytes());
1058 let err = deserialize(&bytes).unwrap_err();
1059 assert!(matches!(err, IndexError::BadStatus(0x77)));
1060 }
1061
1062 #[test]
1063 fn write_and_read_round_trip_via_disk() {
1064 let dir = TempDir::new().unwrap();
1065 fs::create_dir_all(dir.path().join(".mkit")).unwrap();
1066 let mut idx = Index::new();
1067 idx.entries.push(IndexEntry {
1068 path: "test.txt".into(),
1069 status: EntryStatus::Blob,
1070 object_hash: seed_hash("c"),
1071 mtime_ns: 0,
1072 size: 0,
1073 ino: 0,
1074 ctime_ns: 0,
1075 });
1076 write_index(dir.path(), &idx).unwrap();
1077 let read = read_index(dir.path()).unwrap();
1078 assert_eq!(read, idx);
1079 }
1080
1081 #[test]
1082 fn read_missing_file_returns_empty_index() {
1083 let dir = TempDir::new().unwrap();
1084 let idx = read_index(dir.path()).unwrap();
1085 assert!(idx.entries.is_empty());
1086 }
1087
1088 #[test]
1089 fn read_zero_length_file_returns_empty_index() {
1090 let dir = TempDir::new().unwrap();
1091 fs::create_dir_all(dir.path().join(".mkit")).unwrap();
1092 fs::write(dir.path().join(INDEX_FILE), b"").unwrap();
1093 let idx = read_index(dir.path()).unwrap();
1094 assert!(idx.entries.is_empty());
1095 }
1096
1097 #[test]
1098 fn read_oversize_file_rejected() {
1099 let dir = TempDir::new().unwrap();
1100 fs::create_dir_all(dir.path().join(".mkit")).unwrap();
1101 let path = dir.path().join(INDEX_FILE);
1102 let f = fs::OpenOptions::new()
1104 .write(true)
1105 .create(true)
1106 .truncate(true)
1107 .open(&path)
1108 .unwrap();
1109 f.set_len(MAX_INDEX_BYTES + 1).unwrap();
1110 drop(f);
1111 let err = read_index(dir.path()).unwrap_err();
1112 assert!(matches!(err, IndexError::TooLarge));
1113 }
1114
1115 #[test]
1116 fn staged_count_excludes_removed() {
1117 let mut idx = Index::new();
1118 idx.entries.push(IndexEntry {
1119 path: "a".into(),
1120 status: EntryStatus::Blob,
1121 object_hash: seed_hash("a"),
1122 mtime_ns: 0,
1123 size: 0,
1124 ino: 0,
1125 ctime_ns: 0,
1126 });
1127 idx.entries.push(IndexEntry {
1128 path: "b".into(),
1129 status: EntryStatus::Removed,
1130 object_hash: [0u8; HASH_LEN],
1131 mtime_ns: 0,
1132 size: 0,
1133 ino: 0,
1134 ctime_ns: 0,
1135 });
1136 idx.entries.push(IndexEntry {
1137 path: "c".into(),
1138 status: EntryStatus::Blob,
1139 object_hash: seed_hash("c"),
1140 mtime_ns: 0,
1141 size: 0,
1142 ino: 0,
1143 ctime_ns: 0,
1144 });
1145 assert_eq!(idx.staged_count(), 2);
1146 }
1147
1148 #[test]
1149 fn rejects_bogus_huge_count_before_loop() {
1150 let mut bytes = Vec::new();
1155 bytes.extend_from_slice(&MAGIC);
1156 bytes.push(FORMAT_VERSION);
1157 bytes.extend_from_slice(&u32::MAX.to_le_bytes());
1158 let err = deserialize(&bytes).unwrap_err();
1160 assert!(matches!(err, IndexError::Corrupt));
1161 }
1162
1163 #[test]
1164 fn validate_path_basic() {
1165 assert!(validate_index_path("a.txt"));
1166 assert!(validate_index_path("src/main.rs"));
1167 assert!(validate_index_path(".mkitignore"));
1168 assert!(!validate_index_path(""));
1169 assert!(!validate_index_path("/abs"));
1170 assert!(!validate_index_path("../escape"));
1171 assert!(!validate_index_path("a/../b"));
1172 assert!(!validate_index_path(".mkit"));
1173 assert!(!validate_index_path(".git"));
1174 assert!(!validate_index_path(".mkit/objects"));
1175 assert!(!validate_index_path(".git/HEAD"));
1176 assert!(!validate_index_path("a\\b"));
1177 assert!(!validate_index_path("a//b"));
1178 }
1179
1180 #[test]
1181 fn from_tree_flattens_tree_entries() {
1182 use crate::object::{Blob, EntryMode, Object, Tree, TreeEntry};
1183 use crate::serialize;
1184 use crate::store::ObjectStore;
1185
1186 fn put(store: &ObjectStore, obj: &Object) -> Hash {
1187 let bytes = serialize::serialize(obj).unwrap();
1188 store.write(&bytes).unwrap()
1189 }
1190
1191 let dir = TempDir::new().unwrap();
1192 let store = ObjectStore::init(dir.path()).unwrap();
1193 let file = put(
1194 &store,
1195 &Object::Blob(Blob {
1196 data: b"file".to_vec(),
1197 }),
1198 );
1199 let exec = put(
1200 &store,
1201 &Object::Blob(Blob {
1202 data: b"exec".to_vec(),
1203 }),
1204 );
1205 let link = put(
1206 &store,
1207 &Object::Blob(Blob {
1208 data: b"target".to_vec(),
1209 }),
1210 );
1211 let sub = put(
1212 &store,
1213 &Object::Tree(Tree {
1214 entries: vec![TreeEntry {
1215 name: b"run".to_vec(),
1216 mode: EntryMode::Executable,
1217 object_hash: exec,
1218 }],
1219 }),
1220 );
1221 let root = put(
1222 &store,
1223 &Object::Tree(Tree {
1224 entries: vec![
1225 TreeEntry {
1226 name: b"file.txt".to_vec(),
1227 mode: EntryMode::Blob,
1228 object_hash: file,
1229 },
1230 TreeEntry {
1231 name: b"link".to_vec(),
1232 mode: EntryMode::Symlink,
1233 object_hash: link,
1234 },
1235 TreeEntry {
1236 name: b"sub".to_vec(),
1237 mode: EntryMode::Tree,
1238 object_hash: sub,
1239 },
1240 ],
1241 }),
1242 );
1243
1244 let idx = from_tree(&store, root).unwrap();
1245 assert_eq!(idx.entries.len(), 3);
1246 assert_eq!(idx.entries[0].path, "file.txt");
1247 assert_eq!(idx.entries[0].status, EntryStatus::Blob);
1248 assert_eq!(idx.entries[1].path, "link");
1249 assert_eq!(idx.entries[1].status, EntryStatus::Symlink);
1250 assert_eq!(idx.entries[2].path, "sub/run");
1251 assert_eq!(idx.entries[2].status, EntryStatus::Executable);
1252 }
1253
1254 #[test]
1255 fn from_tree_round_trips_through_worktree_builder() {
1256 use crate::object::{Blob, EntryMode, Object, Tree, TreeEntry};
1257 use crate::serialize;
1258 use crate::store::ObjectStore;
1259
1260 fn put(store: &ObjectStore, obj: &Object) -> Hash {
1261 let bytes = serialize::serialize(obj).unwrap();
1262 store.write(&bytes).unwrap()
1263 }
1264
1265 let dir = TempDir::new().unwrap();
1266 let store = ObjectStore::init(dir.path()).unwrap();
1267 let blob = put(
1268 &store,
1269 &Object::Blob(Blob {
1270 data: b"content".to_vec(),
1271 }),
1272 );
1273 let tree = put(
1274 &store,
1275 &Object::Tree(Tree {
1276 entries: vec![TreeEntry {
1277 name: b"a.txt".to_vec(),
1278 mode: EntryMode::Blob,
1279 object_hash: blob,
1280 }],
1281 }),
1282 );
1283
1284 let idx = from_tree(&store, tree).unwrap();
1285 let rebuilt = crate::worktree::build_tree_from_index(&store, &idx).unwrap();
1286 assert_eq!(rebuilt, tree);
1287 }
1288}