Skip to main content

grit_lib/
split_index.rs

1//! Split index: `link` extension and `sharedindex.<sha1>` (Git `split-index.c`).
2
3use std::fs;
4use std::io;
5use std::path::{Path, PathBuf};
6
7use sha1::{Digest, Sha1};
8
9use crate::config::ConfigSet;
10use crate::error::{Error, Result};
11use crate::ewah_bitmap::EwahBitmap;
12use crate::git_date::approx::approxidate_careful;
13use crate::index::{Index, IndexEntry};
14use crate::objects::ObjectId;
15
16/// Split-index metadata carried on an [`Index`] (in-memory; bitmaps cleared after merge/write).
17#[derive(Debug, Clone)]
18pub(crate) struct SplitIndexLink {
19    /// OID of the shared index file (`sharedindex.<hex>`).
20    pub base_oid: ObjectId,
21    pub delete_bitmap: Option<EwahBitmap>,
22    pub replace_bitmap: Option<EwahBitmap>,
23}
24
25fn parse_shared_repository_perm(raw: Option<&str>) -> i32 {
26    const PERM_UMASK: i32 = 0;
27    const OLD_PERM_GROUP: i32 = 1;
28    const OLD_PERM_EVERYBODY: i32 = 2;
29    const PERM_GROUP: i32 = 0o660;
30    const PERM_EVERYBODY: i32 = 0o664;
31
32    let Some(value) = raw.map(str::trim).filter(|s| !s.is_empty()) else {
33        return PERM_UMASK;
34    };
35    if value.eq_ignore_ascii_case("umask") {
36        return PERM_UMASK;
37    }
38    if value.eq_ignore_ascii_case("group") {
39        return PERM_GROUP;
40    }
41    if value.eq_ignore_ascii_case("all")
42        || value.eq_ignore_ascii_case("world")
43        || value.eq_ignore_ascii_case("everybody")
44    {
45        return PERM_EVERYBODY;
46    }
47    if !value.is_empty() && value.chars().all(|c| ('0'..='7').contains(&c)) {
48        if let Ok(i) = i32::from_str_radix(value, 8) {
49            return match i {
50                PERM_UMASK => PERM_UMASK,
51                OLD_PERM_GROUP => PERM_GROUP,
52                OLD_PERM_EVERYBODY => PERM_EVERYBODY,
53                _ => {
54                    if (i & 0o600) != 0o600 {
55                        return PERM_UMASK;
56                    }
57                    -(i & 0o666)
58                }
59            };
60        }
61    }
62    if value.eq_ignore_ascii_case("true") {
63        PERM_GROUP
64    } else if value.eq_ignore_ascii_case("false") {
65        PERM_UMASK
66    } else {
67        PERM_UMASK
68    }
69}
70
71fn calc_shared_perm(shared_repo: i32, mode: u32) -> u32 {
72    let tweak = if shared_repo < 0 {
73        (-shared_repo) as u32
74    } else {
75        shared_repo as u32
76    };
77
78    let mut new_mode = if shared_repo < 0 {
79        (mode & !0o777) | tweak
80    } else {
81        mode | tweak
82    };
83
84    if mode & 0o200 == 0 {
85        new_mode &= !0o222;
86    }
87    if mode & 0o100 != 0 {
88        new_mode |= (new_mode & 0o444) >> 2;
89    }
90
91    new_mode
92}
93
94#[cfg(unix)]
95fn adjust_shared_perm_file(path: &Path, shared_repo: i32) -> io::Result<()> {
96    if shared_repo == 0 {
97        return Ok(());
98    }
99    use std::os::unix::fs::PermissionsExt;
100    let meta = fs::metadata(path)?;
101    let old = meta.permissions().mode();
102    let new_mode = calc_shared_perm(shared_repo, old);
103    if (old ^ new_mode) & 0o777 != 0 {
104        let mut p = meta.permissions();
105        p.set_mode(new_mode & 0o777);
106        fs::set_permissions(path, p)?;
107    }
108    Ok(())
109}
110
111#[cfg(not(unix))]
112fn adjust_shared_perm_file(_path: &Path, _shared_repo: i32) -> io::Result<()> {
113    Ok(())
114}
115
116/// Compare on-disk-relevant fields (Git `compare_ce_content` in `split-index.c`).
117pub(crate) fn entries_equal_for_split(a: &IndexEntry, b: &IndexEntry) -> bool {
118    let mask: u16 = 0xF000 | 0x8000;
119    let a_flags = a.flags & mask;
120    let b_flags = b.flags & mask;
121    let a_ext = a.flags_extended.unwrap_or(0) & 0xFFF;
122    let b_ext = b.flags_extended.unwrap_or(0) & 0xFFF;
123    a.ctime_sec == b.ctime_sec
124        && a.ctime_nsec == b.ctime_nsec
125        && a.mtime_sec == b.mtime_sec
126        && a.mtime_nsec == b.mtime_nsec
127        && a.dev == b.dev
128        && a.ino == b.ino
129        && a.mode == b.mode
130        && a.uid == b.uid
131        && a.gid == b.gid
132        && a.size == b.size
133        && a.oid == b.oid
134        && a_flags == b_flags
135        && a_ext == b_ext
136}
137
138fn replace_positions_in_order(link: &SplitIndexLink) -> Vec<usize> {
139    let Some(bm) = &link.replace_bitmap else {
140        return Vec::new();
141    };
142    if bm.bit_size == 0 {
143        return Vec::new();
144    }
145    let mut v = Vec::new();
146    bm.each_set_bit(|p| v.push(p));
147    v
148}
149
150fn bitmap_has_bit(bm: &EwahBitmap, i: usize) -> bool {
151    let mut found = false;
152    bm.each_set_bit(|pos| {
153        if pos == i {
154            found = true;
155        }
156    });
157    found
158}
159
160/// Merge split index + shared base into `index.entries` (Git `merge_base_index`).
161pub(crate) fn merge_split_into_index(
162    index: &mut Index,
163    link: SplitIndexLink,
164    base_entries: Vec<IndexEntry>,
165) -> Result<()> {
166    let saved = std::mem::take(&mut index.entries);
167    let replace_pos = replace_positions_in_order(&link);
168    let stubs: Vec<IndexEntry> = saved
169        .iter()
170        .filter(|e| e.path.is_empty())
171        .cloned()
172        .collect();
173    if stubs.len() != replace_pos.len() {
174        return Err(Error::IndexError(format!(
175            "split index: expected {} replacement stubs, found {}",
176            replace_pos.len(),
177            stubs.len()
178        )));
179    }
180    let mut stub_iter = stubs.into_iter();
181    let rest: Vec<IndexEntry> = saved.into_iter().filter(|e| !e.path.is_empty()).collect();
182
183    let delete = &link.delete_bitmap;
184    let replace = &link.replace_bitmap;
185
186    let mut merged: Vec<IndexEntry> = Vec::new();
187
188    for (i, mut base_e) in base_entries.into_iter().enumerate() {
189        if delete
190            .as_ref()
191            .is_some_and(|b| b.bit_size > 0 && bitmap_has_bit(b, i))
192        {
193            continue;
194        }
195        if replace
196            .as_ref()
197            .is_some_and(|b| b.bit_size > 0 && bitmap_has_bit(b, i))
198        {
199            let Some(rep) = stub_iter.next() else {
200                return Err(Error::IndexError(
201                    "split index: missing replacement entry".to_owned(),
202                ));
203            };
204            let mut e = rep;
205            e.path = base_e.path.clone();
206            e.base_index_pos = (i + 1) as u32;
207            merged.push(e);
208        } else {
209            base_e.base_index_pos = (i + 1) as u32;
210            merged.push(base_e);
211        }
212    }
213
214    if stub_iter.next().is_some() {
215        return Err(Error::IndexError(
216            "split index: too many replacement stubs".to_owned(),
217        ));
218    }
219
220    for mut e in rest {
221        e.base_index_pos = 0;
222        merged.push(e);
223    }
224
225    merged.sort_by(|a, b| a.path.cmp(&b.path).then_with(|| a.stage().cmp(&b.stage())));
226    index.entries = merged;
227    Ok(())
228}
229
230/// Parse the `link` extension payload (Git `read_link_extension`).
231pub(crate) fn parse_link_extension(data: &[u8]) -> Result<SplitIndexLink> {
232    if data.len() < 20 {
233        return Err(Error::IndexError(
234            "corrupt link extension (too short)".to_owned(),
235        ));
236    }
237    let base_oid = ObjectId::from_bytes(&data[..20])?;
238    let mut rest = &data[20..];
239    if rest.is_empty() {
240        return Ok(SplitIndexLink {
241            base_oid,
242            delete_bitmap: None,
243            replace_bitmap: None,
244        });
245    }
246    let Some((del, consumed)) = EwahBitmap::deserialize_prefix(rest) else {
247        return Err(Error::IndexError(
248            "corrupt delete bitmap in link extension".to_owned(),
249        ));
250    };
251    rest = &rest[consumed..];
252    let Some((rep, consumed2)) = EwahBitmap::deserialize_prefix(rest) else {
253        return Err(Error::IndexError(
254            "corrupt replace bitmap in link extension".to_owned(),
255        ));
256    };
257    rest = &rest[consumed2..];
258    if !rest.is_empty() {
259        return Err(Error::IndexError(
260            "garbage at the end of link extension".to_owned(),
261        ));
262    }
263    Ok(SplitIndexLink {
264        base_oid,
265        delete_bitmap: Some(del),
266        replace_bitmap: Some(rep),
267    })
268}
269
270/// Serialize `link` extension: base OID plus two EWAH bitmaps (Git always writes both after `prepare_to_write_split_index`).
271pub(crate) fn serialize_link_extension_payload(
272    base_oid: &ObjectId,
273    delete: &EwahBitmap,
274    replace: &EwahBitmap,
275) -> Vec<u8> {
276    let mut out = base_oid.as_bytes().to_vec();
277    delete.serialize(&mut out);
278    replace.serialize(&mut out);
279    out
280}
281
282/// Resolve path to shared index file (Git `read_index_from`), with fallbacks when `git_dir` does
283/// not match the repo that owns the index (nested trash repo + `GIT_INDEX_FILE`).
284fn resolve_shared_index_file(git_dir: &Path, index_path: &Path, base_oid: &ObjectId) -> PathBuf {
285    let name = format!("sharedindex.{}", base_oid.to_hex());
286    let primary = git_dir.join(&name);
287
288    let try_path = |p: PathBuf| -> Option<PathBuf> {
289        if p.is_file() {
290            Some(p)
291        } else {
292            None
293        }
294    };
295
296    if let Some(p) = try_path(primary.clone()) {
297        return p;
298    }
299    if let Some(parent) = index_path.parent() {
300        if let Some(p) = try_path(parent.join(&name)) {
301            return p;
302        }
303    }
304    if let Ok(cwd) = std::env::current_dir() {
305        let mut dir = cwd.as_path();
306        loop {
307            if let Some(p) = try_path(dir.join(".git").join(&name)) {
308                return p;
309            }
310            let Some(p) = dir.parent() else {
311                break;
312            };
313            dir = p;
314        }
315    }
316    if let Some(d) = index_path.parent() {
317        if let Ok(read) = fs::read_dir(d) {
318            for ent in read.flatten() {
319                let Ok(ft) = ent.file_type() else {
320                    continue;
321                };
322                if !ft.is_dir() {
323                    continue;
324                }
325                if let Some(p) = try_path(ent.path().join(".git").join(&name)) {
326                    return p;
327                }
328            }
329        }
330    }
331    primary
332}
333
334pub(crate) fn hash_index_body(body: &[u8]) -> ObjectId {
335    let mut hasher = Sha1::new();
336    hasher.update(body);
337    let digest = hasher.finalize();
338    ObjectId::from_bytes(&digest).expect("sha1 is 20 bytes")
339}
340
341#[derive(Debug, Clone, Copy, PartialEq, Eq)]
342pub(crate) enum SplitIndexConfig {
343    Disabled,
344    Unset,
345    Enabled,
346}
347
348pub(crate) fn split_index_config(cfg: &ConfigSet) -> SplitIndexConfig {
349    match cfg.get("core.splitIndex") {
350        None => SplitIndexConfig::Unset,
351        Some(v) => {
352            let t = v.trim();
353            if t.eq_ignore_ascii_case("false") || t == "0" {
354                SplitIndexConfig::Disabled
355            } else if t.eq_ignore_ascii_case("true") || t == "1" {
356                SplitIndexConfig::Enabled
357            } else {
358                SplitIndexConfig::Unset
359            }
360        }
361    }
362}
363
364pub(crate) fn max_percent_split_change(cfg: &ConfigSet) -> i32 {
365    match cfg.get("splitIndex.maxPercentChange") {
366        None => -1,
367        Some(v) => v.trim().parse::<i32>().unwrap_or(-1),
368    }
369}
370
371fn default_max_percent() -> i32 {
372    20
373}
374
375pub(crate) fn should_rebuild_shared_index(index: &Index, cfg: &ConfigSet) -> bool {
376    let max_split = max_percent_split_change(cfg);
377    let max_split = match max_split {
378        -1 => default_max_percent(),
379        0 => return true,
380        100 => return false,
381        n => n,
382    };
383    let mut not_shared = 0u64;
384    for e in &index.entries {
385        if e.base_index_pos == 0 {
386            not_shared += 1;
387        }
388    }
389    let total = index.entries.len() as u64;
390    if total == 0 {
391        return false;
392    }
393    total * (max_split as u64) < not_shared * 100
394}
395
396pub(crate) fn git_test_split_index_env() -> bool {
397    std::env::var("GIT_TEST_SPLIT_INDEX")
398        .ok()
399        .map(|v| {
400            let t = v.trim();
401            t == "1" || t.eq_ignore_ascii_case("true") || t.eq_ignore_ascii_case("yes")
402        })
403        .unwrap_or(false)
404}
405
406pub(crate) fn git_test_split_index_force_reorder(base_oid: &ObjectId) -> bool {
407    git_test_split_index_env() && (base_oid.as_bytes()[0] & 15) < 6
408}
409
410pub(crate) fn shared_index_expire_threshold(cfg: &ConfigSet) -> u64 {
411    let raw = cfg
412        .get("splitIndex.sharedIndexExpire")
413        .map(|s| s.trim().to_owned());
414    let spec = raw
415        .as_deref()
416        .filter(|s| !s.is_empty())
417        .unwrap_or("2.weeks.ago");
418    if spec.eq_ignore_ascii_case("never") {
419        return 0;
420    }
421    let mut err = 0;
422    approxidate_careful(spec, Some(&mut err))
423}
424
425fn should_delete_shared_index(path: &Path, expiration: u64) -> bool {
426    if expiration == 0 {
427        return false;
428    }
429    let Ok(meta) = fs::metadata(path) else {
430        return false;
431    };
432    #[cfg(unix)]
433    {
434        use std::os::unix::fs::MetadataExt;
435        meta.mtime() as u64 <= expiration
436    }
437    #[cfg(not(unix))]
438    {
439        let _ = meta;
440        false
441    }
442}
443
444pub(crate) fn clean_stale_shared_index_files(git_dir: &Path, current_hex: &str, cfg: &ConfigSet) {
445    let expiration = shared_index_expire_threshold(cfg);
446    let Ok(read_dir) = fs::read_dir(git_dir) else {
447        return;
448    };
449    for ent in read_dir.flatten() {
450        let name = ent.file_name();
451        let Some(name) = name.to_str() else {
452            continue;
453        };
454        let Some(hex) = name.strip_prefix("sharedindex.") else {
455            continue;
456        };
457        if hex == current_hex {
458            continue;
459        }
460        let path = ent.path();
461        if should_delete_shared_index(&path, expiration) {
462            let _ = fs::remove_file(&path);
463        }
464    }
465}
466
467pub(crate) fn freshen_shared_index(path: &Path) {
468    let _ = filetime_set_to_now(path);
469}
470
471#[cfg(unix)]
472fn filetime_set_to_now(path: &Path) -> io::Result<()> {
473    use std::time::SystemTime;
474    let t = SystemTime::now();
475    let ft = filetime::FileTime::from_system_time(t);
476    filetime::set_file_mtime(path, ft)
477}
478
479#[cfg(not(unix))]
480fn filetime_set_to_now(_path: &Path) -> io::Result<()> {
481    Ok(())
482}
483
484/// Request from `update-index` for the next index write.
485#[derive(Debug, Clone, Copy, Default)]
486pub struct WriteSplitIndexRequest {
487    /// `Some(true)` / `Some(false)` for `--[no-]split-index`; `None` uses config / test env only.
488    pub explicit: Option<bool>,
489}
490
491impl WriteSplitIndexRequest {
492    /// Whether the next write should use split-index format.
493    ///
494    /// Matches Git: `--split-index` still enables split index when `core.splitIndex` is false,
495    /// but emits a warning (see `builtin/update-index.c`).
496    ///
497    /// When `explicit` is `None`, an index that was already split (`split_link` set after load)
498    /// stays split until `--no-split-index` (Git keeps `istate->split_index` across commands).
499    pub fn want_write_split(self, cfg: &ConfigSet, index: &Index) -> bool {
500        match self.explicit {
501            Some(false) => {
502                if matches!(split_index_config(cfg), SplitIndexConfig::Enabled) {
503                    eprintln!(
504                        "warning: core.splitIndex is set to true; remove or change it, if you really want to disable split index"
505                    );
506                }
507                false
508            }
509            Some(true) => {
510                if matches!(split_index_config(cfg), SplitIndexConfig::Disabled) {
511                    eprintln!(
512                        "warning: core.splitIndex is set to false; remove or change it, if you really want to enable split index"
513                    );
514                }
515                true
516            }
517            None => {
518                if matches!(split_index_config(cfg), SplitIndexConfig::Disabled) {
519                    return false;
520                }
521                index.split_link.is_some()
522                    || matches!(split_index_config(cfg), SplitIndexConfig::Enabled)
523                    || git_test_split_index_env()
524            }
525        }
526    }
527}
528
529fn find_entry_pos_sorted(entries: &[IndexEntry], path: &[u8], stage: u8) -> Option<usize> {
530    entries
531        .binary_search_by(|e| {
532            e.path
533                .as_slice()
534                .cmp(path)
535                .then_with(|| e.stage().cmp(&stage))
536        })
537        .ok()
538}
539
540fn load_shared_entries(
541    git_dir: &Path,
542    index_path: &Path,
543    base_oid: &ObjectId,
544) -> Result<Vec<IndexEntry>> {
545    let p = resolve_shared_index_file(git_dir, index_path, base_oid);
546    let data = fs::read(&p).map_err(Error::Io)?;
547    let mut shared = Index::parse(&data)?;
548    for (i, e) in shared.entries.iter_mut().enumerate() {
549        e.base_index_pos = (i + 1) as u32;
550    }
551    Ok(shared.entries)
552}
553
554/// Write split index to `path` under `git_dir`, updating `index` base positions and `split_link`.
555pub(crate) fn write_index_file_split(
556    path: &Path,
557    git_dir: &Path,
558    index: &mut Index,
559    cfg: &ConfigSet,
560    request: WriteSplitIndexRequest,
561    skip_hash: bool,
562) -> Result<()> {
563    let want_split = request.want_write_split(cfg, index);
564
565    let shared_repo = parse_shared_repository_perm(cfg.get("core.sharedRepository").as_deref());
566
567    if !want_split {
568        index.split_link = None;
569        for e in &mut index.entries {
570            e.base_index_pos = 0;
571        }
572        index.write_to_path(path, skip_hash)?;
573        adjust_shared_perm_file(path, shared_repo).map_err(Error::Io)?;
574        return Ok(());
575    }
576
577    // Git `alternate_index_output`: split index is only written to the repository's primary index
578    // file (`$GIT_DIR/index`). `GIT_INDEX_FILE` pointing elsewhere gets a unified index (t1700 #25).
579    let default_index = git_dir.join("index");
580    let is_primary_index = path == default_index
581        || path
582            .canonicalize()
583            .ok()
584            .zip(default_index.canonicalize().ok())
585            .is_some_and(|(a, b)| a == b);
586    if !is_primary_index {
587        index.split_link = None;
588        for e in &mut index.entries {
589            e.base_index_pos = 0;
590        }
591        index.write_to_path(path, skip_hash)?;
592        adjust_shared_perm_file(path, shared_repo).map_err(Error::Io)?;
593        return Ok(());
594    }
595
596    if index.sparse_directories {
597        return Err(Error::IndexError(
598            "cannot write split index for a sparse index".to_owned(),
599        ));
600    }
601
602    let prev_base = index
603        .split_link
604        .as_ref()
605        .map(|l| l.base_oid)
606        .unwrap_or(ObjectId::zero());
607
608    let mut rebuild = index.split_link.is_none()
609        || should_rebuild_shared_index(index, cfg)
610        || git_test_split_index_force_reorder(&prev_base);
611
612    if git_test_split_index_env() && index.split_link.is_none() {
613        rebuild = true;
614    }
615
616    let base_snapshot: Vec<IndexEntry> = if rebuild {
617        let mut v: Vec<IndexEntry> = index.entries.to_vec();
618        v.sort_by(|a, b| a.path.cmp(&b.path).then_with(|| a.stage().cmp(&b.stage())));
619        for (i, e) in v.iter_mut().enumerate() {
620            e.base_index_pos = (i + 1) as u32;
621        }
622        v
623    } else {
624        load_shared_entries(
625            git_dir,
626            path,
627            &index.split_link.as_ref().expect("split_link").base_oid,
628        )?
629    };
630
631    // After a shared-index rebuild, `base_snapshot` matches the merged index exactly; align indices
632    // (e.g. `--no-split-index` then `--split-index`). When reusing an on-disk shared file, do not
633    // remap by path — deleted paths can still exist in the shared index until expiry/rebuild, and
634    // re-adding the same path must stay split-only (`base_index_pos` 0) like Git.
635    if rebuild {
636        for e in &mut index.entries {
637            if let Some(i) = base_snapshot
638                .iter()
639                .position(|b| b.path == e.path && b.stage() == e.stage())
640            {
641                e.base_index_pos = (i + 1) as u32;
642            } else {
643                e.base_index_pos = 0;
644            }
645        }
646    }
647
648    let base_oid = if rebuild {
649        let shared_index = Index {
650            version: index.version,
651            entries: base_snapshot.clone(),
652            sparse_directories: false,
653            untracked_cache: None,
654            fsmonitor_last_update: None,
655            resolve_undo: None,
656            split_link: None,
657            cache_tree_root: None,
658        };
659        let tmp = match tempfile::NamedTempFile::new_in(git_dir) {
660            Ok(t) => t,
661            Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
662                // Git: mks_tempfile_sm failure falls back to a unified index (no `link` extension).
663                index.split_link = None;
664                for e in &mut index.entries {
665                    e.base_index_pos = 0;
666                }
667                index.write_to_path(path, skip_hash)?;
668                adjust_shared_perm_file(path, shared_repo).map_err(Error::Io)?;
669                return Ok(());
670            }
671            Err(e) => return Err(Error::Io(e)),
672        };
673        let tmp_path = tmp.path().to_path_buf();
674        shared_index.write_to_path(&tmp_path, skip_hash)?;
675        adjust_shared_perm_file(&tmp_path, shared_repo).map_err(Error::Io)?;
676        let file_data = fs::read(&tmp_path).map_err(Error::Io)?;
677        if file_data.len() < 20 {
678            return Err(Error::IndexError("shared index temp too short".to_owned()));
679        }
680        let body = &file_data[..file_data.len() - 20];
681        let oid = hash_index_body(body);
682        let dest = git_dir.join(format!("sharedindex.{}", oid.to_hex()));
683        if let Err(e) = fs::rename(&tmp_path, &dest) {
684            if e.kind() == io::ErrorKind::PermissionDenied {
685                let _ = fs::remove_file(&tmp_path);
686                index.split_link = None;
687                for ent in &mut index.entries {
688                    ent.base_index_pos = 0;
689                }
690                index.write_to_path(path, skip_hash)?;
691                adjust_shared_perm_file(path, shared_repo).map_err(Error::Io)?;
692                return Ok(());
693            }
694            return Err(Error::Io(e));
695        }
696        clean_stale_shared_index_files(git_dir, &oid.to_hex(), cfg);
697        oid
698    } else {
699        let oid = index.split_link.as_ref().unwrap().base_oid;
700        freshen_shared_index(&resolve_shared_index_file(git_dir, path, &oid));
701        oid
702    };
703
704    // Map each shared-index row to the merged entry that claims it (`ce->index`), like Git
705    // `prepare_to_write_split_index` (path must still match that row).
706    let mut merged_by_pos: Vec<Option<usize>> = vec![None; base_snapshot.len()];
707    for (p, e) in index.entries.iter().enumerate() {
708        if e.base_index_pos == 0 {
709            continue;
710        }
711        let i = e.base_index_pos.saturating_sub(1) as usize;
712        if i < base_snapshot.len()
713            && base_snapshot[i].path == e.path
714            && base_snapshot[i].stage() == e.stage()
715        {
716            merged_by_pos[i] = Some(p);
717        }
718    }
719
720    let mut delete_bm = EwahBitmap::new();
721    let mut replace_bm = EwahBitmap::new();
722    let mut main_entries: Vec<IndexEntry> = Vec::new();
723
724    for i in 0..base_snapshot.len() {
725        let b = &base_snapshot[i];
726        if let Some(p) = merged_by_pos[i] {
727            let c = &index.entries[p];
728            if entries_equal_for_split(b, c) {
729                continue;
730            }
731            replace_bm.set_bit_extend(i);
732            let mut stub = c.clone();
733            stub.path.clear();
734            stub.base_index_pos = 0;
735            main_entries.push(stub);
736        } else {
737            delete_bm.set_bit_extend(i);
738        }
739    }
740
741    for e in &index.entries {
742        if e.base_index_pos == 0 {
743            let mut c = e.clone();
744            c.base_index_pos = 0;
745            main_entries.push(c);
746            continue;
747        }
748        let i = e.base_index_pos.saturating_sub(1) as usize;
749        if i >= base_snapshot.len()
750            || base_snapshot[i].path != e.path
751            || base_snapshot[i].stage() != e.stage()
752        {
753            let mut c = e.clone();
754            c.base_index_pos = 0;
755            main_entries.push(c);
756            continue;
757        }
758        if entries_equal_for_split(&base_snapshot[i], e) {
759            continue;
760        }
761        // Replacement: stub already pushed above.
762    }
763
764    main_entries.sort_by(|a, b| a.path.cmp(&b.path).then_with(|| a.stage().cmp(&b.stage())));
765
766    let link = SplitIndexLink {
767        base_oid,
768        delete_bitmap: Some(delete_bm),
769        replace_bitmap: Some(replace_bm),
770    };
771
772    let out_index = Index {
773        version: index.version,
774        entries: main_entries,
775        sparse_directories: false,
776        untracked_cache: index.untracked_cache.clone(),
777        fsmonitor_last_update: index.fsmonitor_last_update.clone(),
778        resolve_undo: None,
779        split_link: Some(link),
780        cache_tree_root: index.cache_tree_root,
781    };
782
783    out_index.write_to_path(path, skip_hash)?;
784    adjust_shared_perm_file(path, shared_repo).map_err(Error::Io)?;
785
786    for e in &mut index.entries {
787        if let Some(pos) = find_entry_pos_sorted(&base_snapshot, &e.path, e.stage()) {
788            if entries_equal_for_split(&base_snapshot[pos], e) {
789                e.base_index_pos = (pos + 1) as u32;
790                continue;
791            }
792        }
793        e.base_index_pos = 0;
794    }
795
796    index.split_link = Some(SplitIndexLink {
797        base_oid,
798        delete_bitmap: None,
799        replace_bitmap: None,
800    });
801
802    Ok(())
803}
804
805/// Human-readable split-index dump for `test-tool dump-split-index`.
806/// If `index` has a split `link` extension, load the shared index and merge entries.
807pub fn resolve_split_index_if_needed(
808    index: &mut Index,
809    git_dir: &Path,
810    index_path: &Path,
811) -> Result<()> {
812    let Some(link) = index.split_link.clone() else {
813        return Ok(());
814    };
815    if link.base_oid.is_zero() {
816        return Ok(());
817    }
818    let base_oid = link.base_oid;
819    let shared_path = resolve_shared_index_file(git_dir, index_path, &base_oid);
820    let data = fs::read(&shared_path).map_err(|e| {
821        Error::IndexError(format!(
822            "split index: cannot read shared index {}: {e}",
823            shared_path.display()
824        ))
825    })?;
826    if data.len() < 20 {
827        return Err(Error::IndexError(
828            "split index: shared index too short".to_owned(),
829        ));
830    }
831    let body = &data[..data.len() - 20];
832    let got = hash_index_body(body);
833    if got != base_oid {
834        return Err(Error::IndexError(format!(
835            "broken index, expect {} in {}, got {}",
836            base_oid.to_hex(),
837            shared_path.display(),
838            got.to_hex()
839        )));
840    }
841    freshen_shared_index(&shared_path);
842    let base_entries = Index::parse(&data)?.entries;
843    merge_split_into_index(index, link, base_entries)?;
844    index.split_link = Some(SplitIndexLink {
845        base_oid,
846        delete_bitmap: None,
847        replace_bitmap: None,
848    });
849    Ok(())
850}
851
852/// Format output for `test-tool dump-split-index` (Git reads the index with `do_read_index` only,
853/// without merging the shared base — stubs and EWAH bitmaps stay intact).
854pub fn format_dump_split_index_file(data: &[u8], index: &Index) -> Result<String> {
855    use std::fmt::Write;
856    if data.len() < 20 {
857        return Err(Error::IndexError("index too short".to_owned()));
858    }
859    let body = &data[..data.len() - 20];
860    let trail = &data[data.len() - 20..];
861    let own = if trail.iter().all(|&b| b == 0) {
862        hash_index_body(body)
863    } else {
864        ObjectId::from_bytes(trail)?
865    };
866
867    let mut s = String::new();
868    writeln!(s, "own {}", own.to_hex()).map_err(|e| Error::IndexError(e.to_string()))?;
869    let Some(link) = &index.split_link else {
870        writeln!(s, "not a split index").map_err(|e| Error::IndexError(e.to_string()))?;
871        return Ok(s);
872    };
873    writeln!(s, "base {}", link.base_oid.to_hex()).map_err(|e| Error::IndexError(e.to_string()))?;
874    for e in &index.entries {
875        // Split-index replacement stubs use `CE_STRIP_NAME`: zero-length path on disk (Git still prints the line).
876        let path_disp = String::from_utf8_lossy(&e.path);
877        writeln!(
878            s,
879            "{:06o} {} {}\t{}",
880            e.mode,
881            e.oid.to_hex(),
882            e.stage(),
883            path_disp
884        )
885        .map_err(|e| Error::IndexError(e.to_string()))?;
886    }
887    write!(s, "replacements:").map_err(|e| Error::IndexError(e.to_string()))?;
888    if let Some(bm) = &link.replace_bitmap {
889        bm.each_set_bit(|pos| {
890            write!(s, " {}", pos).ok();
891        });
892    }
893    writeln!(s).map_err(|e| Error::IndexError(e.to_string()))?;
894    write!(s, "deletions:").map_err(|e| Error::IndexError(e.to_string()))?;
895    if let Some(bm) = &link.delete_bitmap {
896        bm.each_set_bit(|pos| {
897            write!(s, " {}", pos).ok();
898        });
899    }
900    writeln!(s).map_err(|e| Error::IndexError(e.to_string()))?;
901    Ok(s)
902}