Skip to main content

grit_lib/
split_index.rs

1//! Split index: `link` extension and `sharedindex.<sha1>` (Git `split-index.c`).
2
3use std::fs;
4use std::io;
5use std::path::{Path, PathBuf};
6
7use sha1::{Digest, Sha1};
8use sha2::Sha256;
9
10use crate::config::ConfigSet;
11use crate::error::{Error, Result};
12use crate::ewah_bitmap::EwahBitmap;
13use crate::git_date::approx::approxidate_careful;
14use crate::index::{Index, IndexEntry};
15use crate::objects::{HashAlgo, ObjectId};
16
17/// Split-index metadata carried on an [`Index`] (in-memory; bitmaps cleared after merge/write).
18#[derive(Debug, Clone)]
19pub(crate) struct SplitIndexLink {
20    /// OID of the shared index file (`sharedindex.<hex>`).
21    pub base_oid: ObjectId,
22    pub delete_bitmap: Option<EwahBitmap>,
23    pub replace_bitmap: Option<EwahBitmap>,
24}
25
26fn parse_shared_repository_perm(raw: Option<&str>) -> i32 {
27    const PERM_UMASK: i32 = 0;
28    const OLD_PERM_GROUP: i32 = 1;
29    const OLD_PERM_EVERYBODY: i32 = 2;
30    const PERM_GROUP: i32 = 0o660;
31    const PERM_EVERYBODY: i32 = 0o664;
32
33    let Some(value) = raw.map(str::trim).filter(|s| !s.is_empty()) else {
34        return PERM_UMASK;
35    };
36    if value.eq_ignore_ascii_case("umask") {
37        return PERM_UMASK;
38    }
39    if value.eq_ignore_ascii_case("group") {
40        return PERM_GROUP;
41    }
42    if value.eq_ignore_ascii_case("all")
43        || value.eq_ignore_ascii_case("world")
44        || value.eq_ignore_ascii_case("everybody")
45    {
46        return PERM_EVERYBODY;
47    }
48    if !value.is_empty() && value.chars().all(|c| ('0'..='7').contains(&c)) {
49        if let Ok(i) = i32::from_str_radix(value, 8) {
50            return match i {
51                PERM_UMASK => PERM_UMASK,
52                OLD_PERM_GROUP => PERM_GROUP,
53                OLD_PERM_EVERYBODY => PERM_EVERYBODY,
54                _ => {
55                    if (i & 0o600) != 0o600 {
56                        return PERM_UMASK;
57                    }
58                    -(i & 0o666)
59                }
60            };
61        }
62    }
63    if value.eq_ignore_ascii_case("true") {
64        PERM_GROUP
65    } else if value.eq_ignore_ascii_case("false") {
66        PERM_UMASK
67    } else {
68        PERM_UMASK
69    }
70}
71
72fn calc_shared_perm(shared_repo: i32, mode: u32) -> u32 {
73    let tweak = if shared_repo < 0 {
74        (-shared_repo) as u32
75    } else {
76        shared_repo as u32
77    };
78
79    let mut new_mode = if shared_repo < 0 {
80        (mode & !0o777) | tweak
81    } else {
82        mode | tweak
83    };
84
85    if mode & 0o200 == 0 {
86        new_mode &= !0o222;
87    }
88    if mode & 0o100 != 0 {
89        new_mode |= (new_mode & 0o444) >> 2;
90    }
91
92    new_mode
93}
94
95#[cfg(unix)]
96fn adjust_shared_perm_file(path: &Path, shared_repo: i32) -> io::Result<()> {
97    if shared_repo == 0 {
98        return Ok(());
99    }
100    use std::os::unix::fs::PermissionsExt;
101    let meta = fs::metadata(path)?;
102    let old = meta.permissions().mode();
103    let new_mode = calc_shared_perm(shared_repo, old);
104    if (old ^ new_mode) & 0o777 != 0 {
105        let mut p = meta.permissions();
106        p.set_mode(new_mode & 0o777);
107        fs::set_permissions(path, p)?;
108    }
109    Ok(())
110}
111
112#[cfg(not(unix))]
113fn adjust_shared_perm_file(_path: &Path, _shared_repo: i32) -> io::Result<()> {
114    Ok(())
115}
116
117/// Compare on-disk-relevant fields (Git `compare_ce_content` in `split-index.c`).
118pub(crate) fn entries_equal_for_split(a: &IndexEntry, b: &IndexEntry) -> bool {
119    let mask: u16 = 0xF000 | 0x8000;
120    let a_flags = a.flags & mask;
121    let b_flags = b.flags & mask;
122    let ext_mask: u16 = 0x7000;
123    let a_ext = a.flags_extended.unwrap_or(0) & ext_mask;
124    let b_ext = b.flags_extended.unwrap_or(0) & ext_mask;
125    a.ctime_sec == b.ctime_sec
126        && a.ctime_nsec == b.ctime_nsec
127        && a.mtime_sec == b.mtime_sec
128        && a.mtime_nsec == b.mtime_nsec
129        && a.dev == b.dev
130        && a.ino == b.ino
131        && a.mode == b.mode
132        && a.uid == b.uid
133        && a.gid == b.gid
134        && a.size == b.size
135        && a.oid == b.oid
136        && a_flags == b_flags
137        && a_ext == b_ext
138}
139
140fn replace_positions_in_order(link: &SplitIndexLink) -> Vec<usize> {
141    let Some(bm) = &link.replace_bitmap else {
142        return Vec::new();
143    };
144    if bm.bit_size == 0 {
145        return Vec::new();
146    }
147    let mut v = Vec::new();
148    bm.each_set_bit(|p| v.push(p));
149    v
150}
151
152fn bitmap_has_bit(bm: &EwahBitmap, i: usize) -> bool {
153    let mut found = false;
154    bm.each_set_bit(|pos| {
155        if pos == i {
156            found = true;
157        }
158    });
159    found
160}
161
162/// Merge split index + shared base into `index.entries` (Git `merge_base_index`).
163pub(crate) fn merge_split_into_index(
164    index: &mut Index,
165    link: SplitIndexLink,
166    base_entries: Vec<IndexEntry>,
167) -> Result<()> {
168    let saved = std::mem::take(&mut index.entries);
169    let replace_pos = replace_positions_in_order(&link);
170    let stubs: Vec<IndexEntry> = saved
171        .iter()
172        .filter(|e| e.path.is_empty())
173        .cloned()
174        .collect();
175    if stubs.len() != replace_pos.len() {
176        return Err(Error::IndexError(format!(
177            "split index: expected {} replacement stubs, found {}",
178            replace_pos.len(),
179            stubs.len()
180        )));
181    }
182    let mut stub_iter = stubs.into_iter();
183    let rest: Vec<IndexEntry> = saved.into_iter().filter(|e| !e.path.is_empty()).collect();
184
185    let delete = &link.delete_bitmap;
186    let replace = &link.replace_bitmap;
187
188    let mut merged: Vec<IndexEntry> = Vec::new();
189
190    for (i, mut base_e) in base_entries.into_iter().enumerate() {
191        if delete
192            .as_ref()
193            .is_some_and(|b| b.bit_size > 0 && bitmap_has_bit(b, i))
194        {
195            continue;
196        }
197        if replace
198            .as_ref()
199            .is_some_and(|b| b.bit_size > 0 && bitmap_has_bit(b, i))
200        {
201            let Some(rep) = stub_iter.next() else {
202                return Err(Error::IndexError(
203                    "split index: missing replacement entry".to_owned(),
204                ));
205            };
206            let mut e = rep;
207            e.path = base_e.path.clone();
208            e.base_index_pos = (i + 1) as u32;
209            merged.push(e);
210        } else {
211            base_e.base_index_pos = (i + 1) as u32;
212            merged.push(base_e);
213        }
214    }
215
216    if stub_iter.next().is_some() {
217        return Err(Error::IndexError(
218            "split index: too many replacement stubs".to_owned(),
219        ));
220    }
221
222    for mut e in rest {
223        e.base_index_pos = 0;
224        merged.push(e);
225    }
226
227    merged.sort_by(|a, b| a.path.cmp(&b.path).then_with(|| a.stage().cmp(&b.stage())));
228    index.entries = merged;
229    Ok(())
230}
231
232/// Parse the `link` extension payload (Git `read_link_extension`).
233pub(crate) fn parse_link_extension(data: &[u8], algo: HashAlgo) -> Result<SplitIndexLink> {
234    let hash_len = algo.len();
235    if data.len() < hash_len {
236        return Err(Error::IndexError(
237            "corrupt link extension (too short)".to_owned(),
238        ));
239    }
240    let base_oid = ObjectId::from_bytes(&data[..hash_len])?;
241    let mut rest = &data[hash_len..];
242    if rest.is_empty() {
243        return Ok(SplitIndexLink {
244            base_oid,
245            delete_bitmap: None,
246            replace_bitmap: None,
247        });
248    }
249    let Some((del, consumed)) = EwahBitmap::deserialize_prefix(rest) else {
250        return Err(Error::IndexError(
251            "corrupt delete bitmap in link extension".to_owned(),
252        ));
253    };
254    rest = &rest[consumed..];
255    let Some((rep, consumed2)) = EwahBitmap::deserialize_prefix(rest) else {
256        return Err(Error::IndexError(
257            "corrupt replace bitmap in link extension".to_owned(),
258        ));
259    };
260    rest = &rest[consumed2..];
261    if !rest.is_empty() {
262        return Err(Error::IndexError(
263            "garbage at the end of link extension".to_owned(),
264        ));
265    }
266    Ok(SplitIndexLink {
267        base_oid,
268        delete_bitmap: Some(del),
269        replace_bitmap: Some(rep),
270    })
271}
272
273/// Serialize `link` extension: base OID plus two EWAH bitmaps (Git always writes both after `prepare_to_write_split_index`).
274pub(crate) fn serialize_link_extension_payload(
275    base_oid: &ObjectId,
276    delete: &EwahBitmap,
277    replace: &EwahBitmap,
278) -> Vec<u8> {
279    let mut out = base_oid.as_bytes().to_vec();
280    delete.serialize(&mut out);
281    replace.serialize(&mut out);
282    out
283}
284
285/// Resolve path to shared index file (Git `read_index_from`), with fallbacks when `git_dir` does
286/// not match the repo that owns the index (nested trash repo + `GIT_INDEX_FILE`).
287fn resolve_shared_index_file(git_dir: &Path, index_path: &Path, base_oid: &ObjectId) -> PathBuf {
288    let name = format!("sharedindex.{}", base_oid.to_hex());
289    let primary = git_dir.join(&name);
290
291    let try_path = |p: PathBuf| -> Option<PathBuf> {
292        if p.is_file() {
293            Some(p)
294        } else {
295            None
296        }
297    };
298
299    if let Some(p) = try_path(primary.clone()) {
300        return p;
301    }
302    if let Some(parent) = index_path.parent() {
303        if let Some(p) = try_path(parent.join(&name)) {
304            return p;
305        }
306    }
307    if let Ok(cwd) = std::env::current_dir() {
308        let mut dir = cwd.as_path();
309        loop {
310            if let Some(p) = try_path(dir.join(".git").join(&name)) {
311                return p;
312            }
313            let Some(p) = dir.parent() else {
314                break;
315            };
316            dir = p;
317        }
318    }
319    if let Some(d) = index_path.parent() {
320        if let Ok(read) = fs::read_dir(d) {
321            for ent in read.flatten() {
322                let Ok(ft) = ent.file_type() else {
323                    continue;
324                };
325                if !ft.is_dir() {
326                    continue;
327                }
328                if let Some(p) = try_path(ent.path().join(".git").join(&name)) {
329                    return p;
330                }
331            }
332        }
333    }
334    primary
335}
336
337pub(crate) fn hash_index_body(body: &[u8], algo: HashAlgo) -> ObjectId {
338    let digest: Vec<u8> = match algo {
339        HashAlgo::Sha1 => {
340            let mut hasher = Sha1::new();
341            hasher.update(body);
342            hasher.finalize().to_vec()
343        }
344        HashAlgo::Sha256 => {
345            let mut hasher = Sha256::new();
346            hasher.update(body);
347            hasher.finalize().to_vec()
348        }
349    };
350    ObjectId::from_bytes(&digest).unwrap_or_else(|_| unreachable!("digest is a valid OID width"))
351}
352
353#[derive(Debug, Clone, Copy, PartialEq, Eq)]
354pub(crate) enum SplitIndexConfig {
355    Disabled,
356    Unset,
357    Enabled,
358}
359
360pub(crate) fn split_index_config(cfg: &ConfigSet) -> SplitIndexConfig {
361    match cfg.get("core.splitIndex") {
362        None => SplitIndexConfig::Unset,
363        Some(v) => {
364            let t = v.trim();
365            if t.eq_ignore_ascii_case("false") || t == "0" {
366                SplitIndexConfig::Disabled
367            } else if t.eq_ignore_ascii_case("true") || t == "1" {
368                SplitIndexConfig::Enabled
369            } else {
370                SplitIndexConfig::Unset
371            }
372        }
373    }
374}
375
376pub(crate) fn max_percent_split_change(cfg: &ConfigSet) -> i32 {
377    match cfg.get("splitIndex.maxPercentChange") {
378        None => -1,
379        Some(v) => v.trim().parse::<i32>().unwrap_or(-1),
380    }
381}
382
383fn default_max_percent() -> i32 {
384    20
385}
386
387pub(crate) fn should_rebuild_shared_index(index: &Index, cfg: &ConfigSet) -> bool {
388    let max_split = max_percent_split_change(cfg);
389    let max_split = match max_split {
390        -1 => default_max_percent(),
391        0 => return true,
392        100 => return false,
393        n => n,
394    };
395    let mut not_shared = 0u64;
396    for e in &index.entries {
397        if e.base_index_pos == 0 {
398            not_shared += 1;
399        }
400    }
401    let total = index.entries.len() as u64;
402    if total == 0 {
403        return false;
404    }
405    total * (max_split as u64) < not_shared * 100
406}
407
408pub(crate) fn git_test_split_index_env() -> bool {
409    std::env::var("GIT_TEST_SPLIT_INDEX")
410        .ok()
411        .map(|v| {
412            let t = v.trim();
413            t == "1" || t.eq_ignore_ascii_case("true") || t.eq_ignore_ascii_case("yes")
414        })
415        .unwrap_or(false)
416}
417
418/// Whether cache-tree verification should run on index write.
419///
420/// Upstream's `write_locked_index` gates this on `git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0)`, but
421/// the upstream test harness (`test-lib.sh`) exports the variable as `true` by default — so in
422/// practice the check is *on* unless a test explicitly sets it to a falsy value. Grit mirrors that
423/// effective default: verification runs unless `GIT_TEST_CHECK_CACHE_TREE` is explicitly falsy
424/// (`0`/`false`/`no`/empty). This only ever rejects a genuinely corrupt cache-tree (e.g. one primed
425/// from a tree with duplicate path entries — `t4058-diff-duplicates`); well-formed trees always
426/// verify cleanly.
427pub(crate) fn git_test_check_cache_tree() -> bool {
428    match std::env::var("GIT_TEST_CHECK_CACHE_TREE") {
429        Ok(v) => {
430            let t = v.trim();
431            !(t.is_empty()
432                || t == "0"
433                || t.eq_ignore_ascii_case("false")
434                || t.eq_ignore_ascii_case("no"))
435        }
436        Err(_) => true,
437    }
438}
439
440pub(crate) fn git_test_split_index_force_reorder(base_oid: &ObjectId) -> bool {
441    git_test_split_index_env() && (base_oid.as_bytes()[0] & 15) < 6
442}
443
444pub(crate) fn shared_index_expire_threshold(cfg: &ConfigSet) -> u64 {
445    let raw = cfg
446        .get("splitIndex.sharedIndexExpire")
447        .map(|s| s.trim().to_owned());
448    let spec = raw
449        .as_deref()
450        .filter(|s| !s.is_empty())
451        .unwrap_or("2.weeks.ago");
452    if spec.eq_ignore_ascii_case("never") {
453        return 0;
454    }
455    let mut err = 0;
456    approxidate_careful(spec, Some(&mut err))
457}
458
459fn should_delete_shared_index(path: &Path, expiration: u64) -> bool {
460    if expiration == 0 {
461        return false;
462    }
463    let Ok(meta) = fs::metadata(path) else {
464        return false;
465    };
466    #[cfg(unix)]
467    {
468        use std::os::unix::fs::MetadataExt;
469        meta.mtime() as u64 <= expiration
470    }
471    #[cfg(not(unix))]
472    {
473        let _ = meta;
474        false
475    }
476}
477
478pub(crate) fn clean_stale_shared_index_files(git_dir: &Path, current_hex: &str, cfg: &ConfigSet) {
479    let expiration = shared_index_expire_threshold(cfg);
480    let Ok(read_dir) = fs::read_dir(git_dir) else {
481        return;
482    };
483    for ent in read_dir.flatten() {
484        let name = ent.file_name();
485        let Some(name) = name.to_str() else {
486            continue;
487        };
488        let Some(hex) = name.strip_prefix("sharedindex.") else {
489            continue;
490        };
491        if hex == current_hex {
492            continue;
493        }
494        let path = ent.path();
495        if should_delete_shared_index(&path, expiration) {
496            let _ = fs::remove_file(&path);
497        }
498    }
499}
500
501pub(crate) fn freshen_shared_index(path: &Path) {
502    let _ = filetime_set_to_now(path);
503}
504
505#[cfg(unix)]
506fn filetime_set_to_now(path: &Path) -> io::Result<()> {
507    use std::time::SystemTime;
508    let t = SystemTime::now();
509    let ft = filetime::FileTime::from_system_time(t);
510    filetime::set_file_mtime(path, ft)
511}
512
513#[cfg(not(unix))]
514fn filetime_set_to_now(_path: &Path) -> io::Result<()> {
515    Ok(())
516}
517
518/// Request from `update-index` for the next index write.
519#[derive(Debug, Clone, Copy, Default)]
520pub struct WriteSplitIndexRequest {
521    /// `Some(true)` / `Some(false)` for `--[no-]split-index`; `None` uses config / test env only.
522    pub explicit: Option<bool>,
523}
524
525impl WriteSplitIndexRequest {
526    /// Whether the next write should use split-index format.
527    ///
528    /// Matches Git: `--split-index` still enables split index when `core.splitIndex` is false,
529    /// but emits a warning (see `builtin/update-index.c`).
530    ///
531    /// When `explicit` is `None`, an index that was already split (`split_link` set after load)
532    /// stays split until `--no-split-index` (Git keeps `istate->split_index` across commands).
533    pub fn want_write_split(self, cfg: &ConfigSet, index: &Index) -> bool {
534        match self.explicit {
535            Some(false) => {
536                if matches!(split_index_config(cfg), SplitIndexConfig::Enabled) {
537                    eprintln!(
538                        "warning: core.splitIndex is set to true; remove or change it, if you really want to disable split index"
539                    );
540                }
541                false
542            }
543            Some(true) => {
544                if matches!(split_index_config(cfg), SplitIndexConfig::Disabled) {
545                    eprintln!(
546                        "warning: core.splitIndex is set to false; remove or change it, if you really want to enable split index"
547                    );
548                }
549                true
550            }
551            None => {
552                if matches!(split_index_config(cfg), SplitIndexConfig::Disabled) {
553                    return false;
554                }
555                index.split_link.is_some()
556                    || matches!(split_index_config(cfg), SplitIndexConfig::Enabled)
557                    || git_test_split_index_env()
558            }
559        }
560    }
561}
562
563fn find_entry_pos_sorted(entries: &[IndexEntry], path: &[u8], stage: u8) -> Option<usize> {
564    entries
565        .binary_search_by(|e| {
566            e.path
567                .as_slice()
568                .cmp(path)
569                .then_with(|| e.stage().cmp(&stage))
570        })
571        .ok()
572}
573
574fn load_shared_entries(
575    git_dir: &Path,
576    index_path: &Path,
577    base_oid: &ObjectId,
578) -> Result<Vec<IndexEntry>> {
579    let p = resolve_shared_index_file(git_dir, index_path, base_oid);
580    let data = fs::read(&p).map_err(Error::Io)?;
581    let mut shared = Index::parse(&data)?;
582    for (i, e) in shared.entries.iter_mut().enumerate() {
583        e.base_index_pos = (i + 1) as u32;
584    }
585    Ok(shared.entries)
586}
587
588/// Write split index to `path` under `git_dir`, updating `index` base positions and `split_link`.
589pub(crate) fn write_index_file_split(
590    path: &Path,
591    git_dir: &Path,
592    index: &mut Index,
593    cfg: &ConfigSet,
594    request: WriteSplitIndexRequest,
595    skip_hash: bool,
596) -> Result<()> {
597    // Mirror upstream `write_locked_index`: under GIT_TEST_CHECK_CACHE_TREE, verify the cache-tree
598    // against the index before persisting. A duplicate-entry tree (t4058) produces a cache-tree
599    // whose entry counts exceed the deduplicated index, which must abort the write with the
600    // canonical "corrupted cache-tree" error rather than silently writing a broken index.
601    if git_test_check_cache_tree() {
602        crate::write_tree::verify_cache_tree(index)?;
603    }
604
605    let want_split = request.want_write_split(cfg, index);
606
607    let shared_repo = parse_shared_repository_perm(cfg.get("core.sharedRepository").as_deref());
608
609    if !want_split {
610        index.split_link = None;
611        for e in &mut index.entries {
612            e.base_index_pos = 0;
613        }
614        index.write_to_path(path, skip_hash)?;
615        adjust_shared_perm_file(path, shared_repo).map_err(Error::Io)?;
616        return Ok(());
617    }
618
619    // Git `alternate_index_output`: split index is only written to the repository's primary index
620    // file (`$GIT_DIR/index`). `GIT_INDEX_FILE` pointing elsewhere gets a unified index (t1700 #25).
621    let default_index = git_dir.join("index");
622    let is_primary_index = path == default_index
623        || path
624            .canonicalize()
625            .ok()
626            .zip(default_index.canonicalize().ok())
627            .is_some_and(|(a, b)| a == b);
628    if !is_primary_index {
629        index.split_link = None;
630        for e in &mut index.entries {
631            e.base_index_pos = 0;
632        }
633        index.write_to_path(path, skip_hash)?;
634        adjust_shared_perm_file(path, shared_repo).map_err(Error::Io)?;
635        return Ok(());
636    }
637
638    if index.sparse_directories {
639        return Err(Error::IndexError(
640            "cannot write split index for a sparse index".to_owned(),
641        ));
642    }
643
644    let prev_base = index
645        .split_link
646        .as_ref()
647        .map(|l| l.base_oid)
648        .unwrap_or(ObjectId::zero());
649
650    let mut rebuild = index.split_link.is_none()
651        || should_rebuild_shared_index(index, cfg)
652        || git_test_split_index_force_reorder(&prev_base);
653
654    if git_test_split_index_env() && index.split_link.is_none() {
655        rebuild = true;
656    }
657
658    let base_snapshot: Vec<IndexEntry> = if rebuild {
659        let mut v: Vec<IndexEntry> = index.entries.to_vec();
660        v.sort_by(|a, b| a.path.cmp(&b.path).then_with(|| a.stage().cmp(&b.stage())));
661        for (i, e) in v.iter_mut().enumerate() {
662            e.base_index_pos = (i + 1) as u32;
663        }
664        v
665    } else {
666        let link = index.split_link.as_ref().ok_or_else(|| {
667            Error::IndexError("split index missing base link during reuse".to_owned())
668        })?;
669        load_shared_entries(git_dir, path, &link.base_oid)?
670    };
671
672    // After a shared-index rebuild, `base_snapshot` matches the merged index exactly; align indices
673    // (e.g. `--no-split-index` then `--split-index`). When reusing an on-disk shared file, do not
674    // remap by path — deleted paths can still exist in the shared index until expiry/rebuild, and
675    // re-adding the same path must stay split-only (`base_index_pos` 0) like Git.
676    if rebuild {
677        for e in &mut index.entries {
678            if let Some(i) = base_snapshot
679                .iter()
680                .position(|b| b.path == e.path && b.stage() == e.stage())
681            {
682                e.base_index_pos = (i + 1) as u32;
683            } else {
684                e.base_index_pos = 0;
685            }
686        }
687    }
688
689    let base_oid = if rebuild {
690        let shared_index = Index {
691            version: index.version,
692            entries: base_snapshot.clone(),
693            sparse_directories: false,
694            untracked_cache: None,
695            fsmonitor_last_update: None,
696            resolve_undo: None,
697            split_link: None,
698            cache_tree_root: None,
699            cache_tree: None,
700            hash_algo: index.hash_algo,
701        };
702        let tmp = match tempfile::NamedTempFile::new_in(git_dir) {
703            Ok(t) => t,
704            Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
705                // Git: mks_tempfile_sm failure falls back to a unified index (no `link` extension).
706                index.split_link = None;
707                for e in &mut index.entries {
708                    e.base_index_pos = 0;
709                }
710                index.write_to_path(path, skip_hash)?;
711                adjust_shared_perm_file(path, shared_repo).map_err(Error::Io)?;
712                return Ok(());
713            }
714            Err(e) => return Err(Error::Io(e)),
715        };
716        let tmp_path = tmp.path().to_path_buf();
717        shared_index.write_to_path(&tmp_path, skip_hash)?;
718        adjust_shared_perm_file(&tmp_path, shared_repo).map_err(Error::Io)?;
719        let file_data = fs::read(&tmp_path).map_err(Error::Io)?;
720        let hash_len = index.hash_algo.len();
721        if file_data.len() < hash_len {
722            return Err(Error::IndexError("shared index temp too short".to_owned()));
723        }
724        let body = &file_data[..file_data.len() - hash_len];
725        let oid = hash_index_body(body, index.hash_algo);
726        let dest = git_dir.join(format!("sharedindex.{}", oid.to_hex()));
727        if let Err(e) = fs::rename(&tmp_path, &dest) {
728            if e.kind() == io::ErrorKind::PermissionDenied {
729                let _ = fs::remove_file(&tmp_path);
730                index.split_link = None;
731                for ent in &mut index.entries {
732                    ent.base_index_pos = 0;
733                }
734                index.write_to_path(path, skip_hash)?;
735                adjust_shared_perm_file(path, shared_repo).map_err(Error::Io)?;
736                return Ok(());
737            }
738            return Err(Error::Io(e));
739        }
740        clean_stale_shared_index_files(git_dir, &oid.to_hex(), cfg);
741        oid
742    } else {
743        let oid = index
744            .split_link
745            .as_ref()
746            .ok_or_else(|| {
747                Error::IndexError("split index missing base link during reuse".to_owned())
748            })?
749            .base_oid;
750        freshen_shared_index(&resolve_shared_index_file(git_dir, path, &oid));
751        oid
752    };
753
754    // Map each shared-index row to the merged entry that claims it (`ce->index`), like Git
755    // `prepare_to_write_split_index` (path must still match that row).
756    let mut merged_by_pos: Vec<Option<usize>> = vec![None; base_snapshot.len()];
757    for (p, e) in index.entries.iter().enumerate() {
758        if e.base_index_pos == 0 {
759            continue;
760        }
761        let i = e.base_index_pos.saturating_sub(1) as usize;
762        if i < base_snapshot.len()
763            && base_snapshot[i].path == e.path
764            && base_snapshot[i].stage() == e.stage()
765        {
766            merged_by_pos[i] = Some(p);
767        }
768    }
769
770    let mut delete_bm = EwahBitmap::new();
771    let mut replace_bm = EwahBitmap::new();
772    let mut main_entries: Vec<IndexEntry> = Vec::new();
773
774    for i in 0..base_snapshot.len() {
775        let b = &base_snapshot[i];
776        if let Some(p) = merged_by_pos[i] {
777            let c = &index.entries[p];
778            if entries_equal_for_split(b, c) {
779                continue;
780            }
781            replace_bm.set_bit_extend(i);
782            let mut stub = c.clone();
783            stub.path.clear();
784            stub.base_index_pos = 0;
785            main_entries.push(stub);
786        } else {
787            delete_bm.set_bit_extend(i);
788        }
789    }
790
791    for e in &index.entries {
792        if e.base_index_pos == 0 {
793            let mut c = e.clone();
794            c.base_index_pos = 0;
795            main_entries.push(c);
796            continue;
797        }
798        let i = e.base_index_pos.saturating_sub(1) as usize;
799        if i >= base_snapshot.len()
800            || base_snapshot[i].path != e.path
801            || base_snapshot[i].stage() != e.stage()
802        {
803            let mut c = e.clone();
804            c.base_index_pos = 0;
805            main_entries.push(c);
806            continue;
807        }
808        if entries_equal_for_split(&base_snapshot[i], e) {
809            continue;
810        }
811        // Replacement: stub already pushed above.
812    }
813
814    main_entries.sort_by(|a, b| a.path.cmp(&b.path).then_with(|| a.stage().cmp(&b.stage())));
815
816    let link = SplitIndexLink {
817        base_oid,
818        delete_bitmap: Some(delete_bm),
819        replace_bitmap: Some(replace_bm),
820    };
821
822    let out_index = Index {
823        version: index.version,
824        entries: main_entries,
825        sparse_directories: false,
826        untracked_cache: index.untracked_cache.clone(),
827        fsmonitor_last_update: index.fsmonitor_last_update.clone(),
828        resolve_undo: None,
829        split_link: Some(link),
830        cache_tree_root: index.cache_tree_root,
831        cache_tree: index.cache_tree.clone(),
832        hash_algo: index.hash_algo,
833    };
834
835    out_index.write_to_path(path, skip_hash)?;
836    adjust_shared_perm_file(path, shared_repo).map_err(Error::Io)?;
837
838    for e in &mut index.entries {
839        if let Some(pos) = find_entry_pos_sorted(&base_snapshot, &e.path, e.stage()) {
840            if entries_equal_for_split(&base_snapshot[pos], e) {
841                e.base_index_pos = (pos + 1) as u32;
842                continue;
843            }
844        }
845        e.base_index_pos = 0;
846    }
847
848    index.split_link = Some(SplitIndexLink {
849        base_oid,
850        delete_bitmap: None,
851        replace_bitmap: None,
852    });
853
854    Ok(())
855}
856
857/// Human-readable split-index dump for `test-tool dump-split-index`.
858/// If `index` has a split `link` extension, load the shared index and merge entries.
859pub fn resolve_split_index_if_needed(
860    index: &mut Index,
861    git_dir: &Path,
862    index_path: &Path,
863) -> Result<()> {
864    let Some(link) = index.split_link.clone() else {
865        return Ok(());
866    };
867    if link.base_oid.is_zero() {
868        return Ok(());
869    }
870    let base_oid = link.base_oid;
871    let shared_path = resolve_shared_index_file(git_dir, index_path, &base_oid);
872    let data = fs::read(&shared_path).map_err(|e| {
873        Error::IndexError(format!(
874            "split index: cannot read shared index {}: {e}",
875            shared_path.display()
876        ))
877    })?;
878    let hash_len = index.hash_algo.len();
879    if data.len() < hash_len {
880        return Err(Error::IndexError(
881            "split index: shared index too short".to_owned(),
882        ));
883    }
884    let body = &data[..data.len() - hash_len];
885    let got = hash_index_body(body, index.hash_algo);
886    if got != base_oid {
887        return Err(Error::IndexError(format!(
888            "broken index, expect {} in {}, got {}",
889            base_oid.to_hex(),
890            shared_path.display(),
891            got.to_hex()
892        )));
893    }
894    freshen_shared_index(&shared_path);
895    let base_entries = Index::parse(&data)?.entries;
896    merge_split_into_index(index, link, base_entries)?;
897    index.split_link = Some(SplitIndexLink {
898        base_oid,
899        delete_bitmap: None,
900        replace_bitmap: None,
901    });
902    Ok(())
903}
904
905/// Format output for `test-tool dump-split-index` (Git reads the index with `do_read_index` only,
906/// without merging the shared base — stubs and EWAH bitmaps stay intact).
907pub fn format_dump_split_index_file(data: &[u8], index: &Index) -> Result<String> {
908    use std::fmt::Write;
909    let hash_len = index.hash_algo.len();
910    if data.len() < hash_len {
911        return Err(Error::IndexError("index too short".to_owned()));
912    }
913    let body = &data[..data.len() - hash_len];
914    let trail = &data[data.len() - hash_len..];
915    let own = if trail.iter().all(|&b| b == 0) {
916        hash_index_body(body, index.hash_algo)
917    } else {
918        ObjectId::from_bytes(trail)?
919    };
920
921    let mut s = String::new();
922    writeln!(s, "own {}", own.to_hex()).map_err(|e| Error::IndexError(e.to_string()))?;
923    let Some(link) = &index.split_link else {
924        writeln!(s, "not a split index").map_err(|e| Error::IndexError(e.to_string()))?;
925        return Ok(s);
926    };
927    writeln!(s, "base {}", link.base_oid.to_hex()).map_err(|e| Error::IndexError(e.to_string()))?;
928    for e in &index.entries {
929        // Split-index replacement stubs use `CE_STRIP_NAME`: zero-length path on disk (Git still prints the line).
930        let path_disp = String::from_utf8_lossy(&e.path);
931        writeln!(
932            s,
933            "{:06o} {} {}\t{}",
934            e.mode,
935            e.oid.to_hex(),
936            e.stage(),
937            path_disp
938        )
939        .map_err(|e| Error::IndexError(e.to_string()))?;
940    }
941    write!(s, "replacements:").map_err(|e| Error::IndexError(e.to_string()))?;
942    if let Some(bm) = &link.replace_bitmap {
943        bm.each_set_bit(|pos| {
944            write!(s, " {}", pos).ok();
945        });
946    }
947    writeln!(s).map_err(|e| Error::IndexError(e.to_string()))?;
948    write!(s, "deletions:").map_err(|e| Error::IndexError(e.to_string()))?;
949    if let Some(bm) = &link.delete_bitmap {
950        bm.each_set_bit(|pos| {
951            write!(s, " {}", pos).ok();
952        });
953    }
954    writeln!(s).map_err(|e| Error::IndexError(e.to_string()))?;
955    Ok(s)
956}