Skip to main content

grit_lib/
split_index.rs

1//! Split index: `link` extension and `sharedindex.<sha1>` (Git `split-index.c`).
2
3use std::fs;
4use std::io;
5use std::path::{Path, PathBuf};
6
7use sha1::{Digest, Sha1};
8
9use crate::config::ConfigSet;
10use crate::error::{Error, Result};
11use crate::ewah_bitmap::EwahBitmap;
12use crate::git_date::approx::approxidate_careful;
13use crate::index::{Index, IndexEntry};
14use crate::objects::ObjectId;
15
16/// Split-index metadata carried on an [`Index`] (in-memory; bitmaps cleared after merge/write).
17#[derive(Debug, Clone)]
18pub(crate) struct SplitIndexLink {
19    /// OID of the shared index file (`sharedindex.<hex>`).
20    pub base_oid: ObjectId,
21    pub delete_bitmap: Option<EwahBitmap>,
22    pub replace_bitmap: Option<EwahBitmap>,
23}
24
25fn parse_shared_repository_perm(raw: Option<&str>) -> i32 {
26    const PERM_UMASK: i32 = 0;
27    const OLD_PERM_GROUP: i32 = 1;
28    const OLD_PERM_EVERYBODY: i32 = 2;
29    const PERM_GROUP: i32 = 0o660;
30    const PERM_EVERYBODY: i32 = 0o664;
31
32    let Some(value) = raw.map(str::trim).filter(|s| !s.is_empty()) else {
33        return PERM_UMASK;
34    };
35    if value.eq_ignore_ascii_case("umask") {
36        return PERM_UMASK;
37    }
38    if value.eq_ignore_ascii_case("group") {
39        return PERM_GROUP;
40    }
41    if value.eq_ignore_ascii_case("all")
42        || value.eq_ignore_ascii_case("world")
43        || value.eq_ignore_ascii_case("everybody")
44    {
45        return PERM_EVERYBODY;
46    }
47    if !value.is_empty() && value.chars().all(|c| ('0'..='7').contains(&c)) {
48        if let Ok(i) = i32::from_str_radix(value, 8) {
49            return match i {
50                PERM_UMASK => PERM_UMASK,
51                OLD_PERM_GROUP => PERM_GROUP,
52                OLD_PERM_EVERYBODY => PERM_EVERYBODY,
53                _ => {
54                    if (i & 0o600) != 0o600 {
55                        return PERM_UMASK;
56                    }
57                    -(i & 0o666)
58                }
59            };
60        }
61    }
62    if value.eq_ignore_ascii_case("true") {
63        PERM_GROUP
64    } else if value.eq_ignore_ascii_case("false") {
65        PERM_UMASK
66    } else {
67        PERM_UMASK
68    }
69}
70
71fn calc_shared_perm(shared_repo: i32, mode: u32) -> u32 {
72    let tweak = if shared_repo < 0 {
73        (-shared_repo) as u32
74    } else {
75        shared_repo as u32
76    };
77
78    let mut new_mode = if shared_repo < 0 {
79        (mode & !0o777) | tweak
80    } else {
81        mode | tweak
82    };
83
84    if mode & 0o200 == 0 {
85        new_mode &= !0o222;
86    }
87    if mode & 0o100 != 0 {
88        new_mode |= (new_mode & 0o444) >> 2;
89    }
90
91    new_mode
92}
93
94#[cfg(unix)]
95fn adjust_shared_perm_file(path: &Path, shared_repo: i32) -> io::Result<()> {
96    if shared_repo == 0 {
97        return Ok(());
98    }
99    use std::os::unix::fs::PermissionsExt;
100    let meta = fs::metadata(path)?;
101    let old = meta.permissions().mode();
102    let new_mode = calc_shared_perm(shared_repo, old);
103    if (old ^ new_mode) & 0o777 != 0 {
104        let mut p = meta.permissions();
105        p.set_mode(new_mode & 0o777);
106        fs::set_permissions(path, p)?;
107    }
108    Ok(())
109}
110
111#[cfg(not(unix))]
112fn adjust_shared_perm_file(_path: &Path, _shared_repo: i32) -> io::Result<()> {
113    Ok(())
114}
115
116/// Compare on-disk-relevant fields (Git `compare_ce_content` in `split-index.c`).
117pub(crate) fn entries_equal_for_split(a: &IndexEntry, b: &IndexEntry) -> bool {
118    let mask: u16 = 0xF000 | 0x8000;
119    let a_flags = a.flags & mask;
120    let b_flags = b.flags & mask;
121    let ext_mask: u16 = 0x7000;
122    let a_ext = a.flags_extended.unwrap_or(0) & ext_mask;
123    let b_ext = b.flags_extended.unwrap_or(0) & ext_mask;
124    a.ctime_sec == b.ctime_sec
125        && a.ctime_nsec == b.ctime_nsec
126        && a.mtime_sec == b.mtime_sec
127        && a.mtime_nsec == b.mtime_nsec
128        && a.dev == b.dev
129        && a.ino == b.ino
130        && a.mode == b.mode
131        && a.uid == b.uid
132        && a.gid == b.gid
133        && a.size == b.size
134        && a.oid == b.oid
135        && a_flags == b_flags
136        && a_ext == b_ext
137}
138
139fn replace_positions_in_order(link: &SplitIndexLink) -> Vec<usize> {
140    let Some(bm) = &link.replace_bitmap else {
141        return Vec::new();
142    };
143    if bm.bit_size == 0 {
144        return Vec::new();
145    }
146    let mut v = Vec::new();
147    bm.each_set_bit(|p| v.push(p));
148    v
149}
150
151fn bitmap_has_bit(bm: &EwahBitmap, i: usize) -> bool {
152    let mut found = false;
153    bm.each_set_bit(|pos| {
154        if pos == i {
155            found = true;
156        }
157    });
158    found
159}
160
161/// Merge split index + shared base into `index.entries` (Git `merge_base_index`).
162pub(crate) fn merge_split_into_index(
163    index: &mut Index,
164    link: SplitIndexLink,
165    base_entries: Vec<IndexEntry>,
166) -> Result<()> {
167    let saved = std::mem::take(&mut index.entries);
168    let replace_pos = replace_positions_in_order(&link);
169    let stubs: Vec<IndexEntry> = saved
170        .iter()
171        .filter(|e| e.path.is_empty())
172        .cloned()
173        .collect();
174    if stubs.len() != replace_pos.len() {
175        return Err(Error::IndexError(format!(
176            "split index: expected {} replacement stubs, found {}",
177            replace_pos.len(),
178            stubs.len()
179        )));
180    }
181    let mut stub_iter = stubs.into_iter();
182    let rest: Vec<IndexEntry> = saved.into_iter().filter(|e| !e.path.is_empty()).collect();
183
184    let delete = &link.delete_bitmap;
185    let replace = &link.replace_bitmap;
186
187    let mut merged: Vec<IndexEntry> = Vec::new();
188
189    for (i, mut base_e) in base_entries.into_iter().enumerate() {
190        if delete
191            .as_ref()
192            .is_some_and(|b| b.bit_size > 0 && bitmap_has_bit(b, i))
193        {
194            continue;
195        }
196        if replace
197            .as_ref()
198            .is_some_and(|b| b.bit_size > 0 && bitmap_has_bit(b, i))
199        {
200            let Some(rep) = stub_iter.next() else {
201                return Err(Error::IndexError(
202                    "split index: missing replacement entry".to_owned(),
203                ));
204            };
205            let mut e = rep;
206            e.path = base_e.path.clone();
207            e.base_index_pos = (i + 1) as u32;
208            merged.push(e);
209        } else {
210            base_e.base_index_pos = (i + 1) as u32;
211            merged.push(base_e);
212        }
213    }
214
215    if stub_iter.next().is_some() {
216        return Err(Error::IndexError(
217            "split index: too many replacement stubs".to_owned(),
218        ));
219    }
220
221    for mut e in rest {
222        e.base_index_pos = 0;
223        merged.push(e);
224    }
225
226    merged.sort_by(|a, b| a.path.cmp(&b.path).then_with(|| a.stage().cmp(&b.stage())));
227    index.entries = merged;
228    Ok(())
229}
230
231/// Parse the `link` extension payload (Git `read_link_extension`).
232pub(crate) fn parse_link_extension(data: &[u8]) -> Result<SplitIndexLink> {
233    if data.len() < 20 {
234        return Err(Error::IndexError(
235            "corrupt link extension (too short)".to_owned(),
236        ));
237    }
238    let base_oid = ObjectId::from_bytes(&data[..20])?;
239    let mut rest = &data[20..];
240    if rest.is_empty() {
241        return Ok(SplitIndexLink {
242            base_oid,
243            delete_bitmap: None,
244            replace_bitmap: None,
245        });
246    }
247    let Some((del, consumed)) = EwahBitmap::deserialize_prefix(rest) else {
248        return Err(Error::IndexError(
249            "corrupt delete bitmap in link extension".to_owned(),
250        ));
251    };
252    rest = &rest[consumed..];
253    let Some((rep, consumed2)) = EwahBitmap::deserialize_prefix(rest) else {
254        return Err(Error::IndexError(
255            "corrupt replace bitmap in link extension".to_owned(),
256        ));
257    };
258    rest = &rest[consumed2..];
259    if !rest.is_empty() {
260        return Err(Error::IndexError(
261            "garbage at the end of link extension".to_owned(),
262        ));
263    }
264    Ok(SplitIndexLink {
265        base_oid,
266        delete_bitmap: Some(del),
267        replace_bitmap: Some(rep),
268    })
269}
270
271/// Serialize `link` extension: base OID plus two EWAH bitmaps (Git always writes both after `prepare_to_write_split_index`).
272pub(crate) fn serialize_link_extension_payload(
273    base_oid: &ObjectId,
274    delete: &EwahBitmap,
275    replace: &EwahBitmap,
276) -> Vec<u8> {
277    let mut out = base_oid.as_bytes().to_vec();
278    delete.serialize(&mut out);
279    replace.serialize(&mut out);
280    out
281}
282
283/// Resolve path to shared index file (Git `read_index_from`), with fallbacks when `git_dir` does
284/// not match the repo that owns the index (nested trash repo + `GIT_INDEX_FILE`).
285fn resolve_shared_index_file(git_dir: &Path, index_path: &Path, base_oid: &ObjectId) -> PathBuf {
286    let name = format!("sharedindex.{}", base_oid.to_hex());
287    let primary = git_dir.join(&name);
288
289    let try_path = |p: PathBuf| -> Option<PathBuf> {
290        if p.is_file() {
291            Some(p)
292        } else {
293            None
294        }
295    };
296
297    if let Some(p) = try_path(primary.clone()) {
298        return p;
299    }
300    if let Some(parent) = index_path.parent() {
301        if let Some(p) = try_path(parent.join(&name)) {
302            return p;
303        }
304    }
305    if let Ok(cwd) = std::env::current_dir() {
306        let mut dir = cwd.as_path();
307        loop {
308            if let Some(p) = try_path(dir.join(".git").join(&name)) {
309                return p;
310            }
311            let Some(p) = dir.parent() else {
312                break;
313            };
314            dir = p;
315        }
316    }
317    if let Some(d) = index_path.parent() {
318        if let Ok(read) = fs::read_dir(d) {
319            for ent in read.flatten() {
320                let Ok(ft) = ent.file_type() else {
321                    continue;
322                };
323                if !ft.is_dir() {
324                    continue;
325                }
326                if let Some(p) = try_path(ent.path().join(".git").join(&name)) {
327                    return p;
328                }
329            }
330        }
331    }
332    primary
333}
334
335pub(crate) fn hash_index_body(body: &[u8]) -> ObjectId {
336    let mut hasher = Sha1::new();
337    hasher.update(body);
338    let digest = hasher.finalize();
339    ObjectId::from_bytes(digest.as_slice()).unwrap_or_else(|_| unreachable!("SHA-1 is 20 bytes"))
340}
341
342#[derive(Debug, Clone, Copy, PartialEq, Eq)]
343pub(crate) enum SplitIndexConfig {
344    Disabled,
345    Unset,
346    Enabled,
347}
348
349pub(crate) fn split_index_config(cfg: &ConfigSet) -> SplitIndexConfig {
350    match cfg.get("core.splitIndex") {
351        None => SplitIndexConfig::Unset,
352        Some(v) => {
353            let t = v.trim();
354            if t.eq_ignore_ascii_case("false") || t == "0" {
355                SplitIndexConfig::Disabled
356            } else if t.eq_ignore_ascii_case("true") || t == "1" {
357                SplitIndexConfig::Enabled
358            } else {
359                SplitIndexConfig::Unset
360            }
361        }
362    }
363}
364
365pub(crate) fn max_percent_split_change(cfg: &ConfigSet) -> i32 {
366    match cfg.get("splitIndex.maxPercentChange") {
367        None => -1,
368        Some(v) => v.trim().parse::<i32>().unwrap_or(-1),
369    }
370}
371
372fn default_max_percent() -> i32 {
373    20
374}
375
376pub(crate) fn should_rebuild_shared_index(index: &Index, cfg: &ConfigSet) -> bool {
377    let max_split = max_percent_split_change(cfg);
378    let max_split = match max_split {
379        -1 => default_max_percent(),
380        0 => return true,
381        100 => return false,
382        n => n,
383    };
384    let mut not_shared = 0u64;
385    for e in &index.entries {
386        if e.base_index_pos == 0 {
387            not_shared += 1;
388        }
389    }
390    let total = index.entries.len() as u64;
391    if total == 0 {
392        return false;
393    }
394    total * (max_split as u64) < not_shared * 100
395}
396
397pub(crate) fn git_test_split_index_env() -> bool {
398    std::env::var("GIT_TEST_SPLIT_INDEX")
399        .ok()
400        .map(|v| {
401            let t = v.trim();
402            t == "1" || t.eq_ignore_ascii_case("true") || t.eq_ignore_ascii_case("yes")
403        })
404        .unwrap_or(false)
405}
406
407/// Whether cache-tree verification should run on index write.
408///
409/// Upstream's `write_locked_index` gates this on `git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0)`, but
410/// the upstream test harness (`test-lib.sh`) exports the variable as `true` by default — so in
411/// practice the check is *on* unless a test explicitly sets it to a falsy value. Grit mirrors that
412/// effective default: verification runs unless `GIT_TEST_CHECK_CACHE_TREE` is explicitly falsy
413/// (`0`/`false`/`no`/empty). This only ever rejects a genuinely corrupt cache-tree (e.g. one primed
414/// from a tree with duplicate path entries — `t4058-diff-duplicates`); well-formed trees always
415/// verify cleanly.
416pub(crate) fn git_test_check_cache_tree() -> bool {
417    match std::env::var("GIT_TEST_CHECK_CACHE_TREE") {
418        Ok(v) => {
419            let t = v.trim();
420            !(t.is_empty()
421                || t == "0"
422                || t.eq_ignore_ascii_case("false")
423                || t.eq_ignore_ascii_case("no"))
424        }
425        Err(_) => true,
426    }
427}
428
429pub(crate) fn git_test_split_index_force_reorder(base_oid: &ObjectId) -> bool {
430    git_test_split_index_env() && (base_oid.as_bytes()[0] & 15) < 6
431}
432
433pub(crate) fn shared_index_expire_threshold(cfg: &ConfigSet) -> u64 {
434    let raw = cfg
435        .get("splitIndex.sharedIndexExpire")
436        .map(|s| s.trim().to_owned());
437    let spec = raw
438        .as_deref()
439        .filter(|s| !s.is_empty())
440        .unwrap_or("2.weeks.ago");
441    if spec.eq_ignore_ascii_case("never") {
442        return 0;
443    }
444    let mut err = 0;
445    approxidate_careful(spec, Some(&mut err))
446}
447
448fn should_delete_shared_index(path: &Path, expiration: u64) -> bool {
449    if expiration == 0 {
450        return false;
451    }
452    let Ok(meta) = fs::metadata(path) else {
453        return false;
454    };
455    #[cfg(unix)]
456    {
457        use std::os::unix::fs::MetadataExt;
458        meta.mtime() as u64 <= expiration
459    }
460    #[cfg(not(unix))]
461    {
462        let _ = meta;
463        false
464    }
465}
466
467pub(crate) fn clean_stale_shared_index_files(git_dir: &Path, current_hex: &str, cfg: &ConfigSet) {
468    let expiration = shared_index_expire_threshold(cfg);
469    let Ok(read_dir) = fs::read_dir(git_dir) else {
470        return;
471    };
472    for ent in read_dir.flatten() {
473        let name = ent.file_name();
474        let Some(name) = name.to_str() else {
475            continue;
476        };
477        let Some(hex) = name.strip_prefix("sharedindex.") else {
478            continue;
479        };
480        if hex == current_hex {
481            continue;
482        }
483        let path = ent.path();
484        if should_delete_shared_index(&path, expiration) {
485            let _ = fs::remove_file(&path);
486        }
487    }
488}
489
490pub(crate) fn freshen_shared_index(path: &Path) {
491    let _ = filetime_set_to_now(path);
492}
493
494#[cfg(unix)]
495fn filetime_set_to_now(path: &Path) -> io::Result<()> {
496    use std::time::SystemTime;
497    let t = SystemTime::now();
498    let ft = filetime::FileTime::from_system_time(t);
499    filetime::set_file_mtime(path, ft)
500}
501
502#[cfg(not(unix))]
503fn filetime_set_to_now(_path: &Path) -> io::Result<()> {
504    Ok(())
505}
506
507/// Request from `update-index` for the next index write.
508#[derive(Debug, Clone, Copy, Default)]
509pub struct WriteSplitIndexRequest {
510    /// `Some(true)` / `Some(false)` for `--[no-]split-index`; `None` uses config / test env only.
511    pub explicit: Option<bool>,
512}
513
514impl WriteSplitIndexRequest {
515    /// Whether the next write should use split-index format.
516    ///
517    /// Matches Git: `--split-index` still enables split index when `core.splitIndex` is false,
518    /// but emits a warning (see `builtin/update-index.c`).
519    ///
520    /// When `explicit` is `None`, an index that was already split (`split_link` set after load)
521    /// stays split until `--no-split-index` (Git keeps `istate->split_index` across commands).
522    pub fn want_write_split(self, cfg: &ConfigSet, index: &Index) -> bool {
523        match self.explicit {
524            Some(false) => {
525                if matches!(split_index_config(cfg), SplitIndexConfig::Enabled) {
526                    eprintln!(
527                        "warning: core.splitIndex is set to true; remove or change it, if you really want to disable split index"
528                    );
529                }
530                false
531            }
532            Some(true) => {
533                if matches!(split_index_config(cfg), SplitIndexConfig::Disabled) {
534                    eprintln!(
535                        "warning: core.splitIndex is set to false; remove or change it, if you really want to enable split index"
536                    );
537                }
538                true
539            }
540            None => {
541                if matches!(split_index_config(cfg), SplitIndexConfig::Disabled) {
542                    return false;
543                }
544                index.split_link.is_some()
545                    || matches!(split_index_config(cfg), SplitIndexConfig::Enabled)
546                    || git_test_split_index_env()
547            }
548        }
549    }
550}
551
552fn find_entry_pos_sorted(entries: &[IndexEntry], path: &[u8], stage: u8) -> Option<usize> {
553    entries
554        .binary_search_by(|e| {
555            e.path
556                .as_slice()
557                .cmp(path)
558                .then_with(|| e.stage().cmp(&stage))
559        })
560        .ok()
561}
562
563fn load_shared_entries(
564    git_dir: &Path,
565    index_path: &Path,
566    base_oid: &ObjectId,
567) -> Result<Vec<IndexEntry>> {
568    let p = resolve_shared_index_file(git_dir, index_path, base_oid);
569    let data = fs::read(&p).map_err(Error::Io)?;
570    let mut shared = Index::parse(&data)?;
571    for (i, e) in shared.entries.iter_mut().enumerate() {
572        e.base_index_pos = (i + 1) as u32;
573    }
574    Ok(shared.entries)
575}
576
577/// Write split index to `path` under `git_dir`, updating `index` base positions and `split_link`.
578pub(crate) fn write_index_file_split(
579    path: &Path,
580    git_dir: &Path,
581    index: &mut Index,
582    cfg: &ConfigSet,
583    request: WriteSplitIndexRequest,
584    skip_hash: bool,
585) -> Result<()> {
586    // Mirror upstream `write_locked_index`: under GIT_TEST_CHECK_CACHE_TREE, verify the cache-tree
587    // against the index before persisting. A duplicate-entry tree (t4058) produces a cache-tree
588    // whose entry counts exceed the deduplicated index, which must abort the write with the
589    // canonical "corrupted cache-tree" error rather than silently writing a broken index.
590    if git_test_check_cache_tree() {
591        crate::write_tree::verify_cache_tree(index)?;
592    }
593
594    let want_split = request.want_write_split(cfg, index);
595
596    let shared_repo = parse_shared_repository_perm(cfg.get("core.sharedRepository").as_deref());
597
598    if !want_split {
599        index.split_link = None;
600        for e in &mut index.entries {
601            e.base_index_pos = 0;
602        }
603        index.write_to_path(path, skip_hash)?;
604        adjust_shared_perm_file(path, shared_repo).map_err(Error::Io)?;
605        return Ok(());
606    }
607
608    // Git `alternate_index_output`: split index is only written to the repository's primary index
609    // file (`$GIT_DIR/index`). `GIT_INDEX_FILE` pointing elsewhere gets a unified index (t1700 #25).
610    let default_index = git_dir.join("index");
611    let is_primary_index = path == default_index
612        || path
613            .canonicalize()
614            .ok()
615            .zip(default_index.canonicalize().ok())
616            .is_some_and(|(a, b)| a == b);
617    if !is_primary_index {
618        index.split_link = None;
619        for e in &mut index.entries {
620            e.base_index_pos = 0;
621        }
622        index.write_to_path(path, skip_hash)?;
623        adjust_shared_perm_file(path, shared_repo).map_err(Error::Io)?;
624        return Ok(());
625    }
626
627    if index.sparse_directories {
628        return Err(Error::IndexError(
629            "cannot write split index for a sparse index".to_owned(),
630        ));
631    }
632
633    let prev_base = index
634        .split_link
635        .as_ref()
636        .map(|l| l.base_oid)
637        .unwrap_or(ObjectId::zero());
638
639    let mut rebuild = index.split_link.is_none()
640        || should_rebuild_shared_index(index, cfg)
641        || git_test_split_index_force_reorder(&prev_base);
642
643    if git_test_split_index_env() && index.split_link.is_none() {
644        rebuild = true;
645    }
646
647    let base_snapshot: Vec<IndexEntry> = if rebuild {
648        let mut v: Vec<IndexEntry> = index.entries.to_vec();
649        v.sort_by(|a, b| a.path.cmp(&b.path).then_with(|| a.stage().cmp(&b.stage())));
650        for (i, e) in v.iter_mut().enumerate() {
651            e.base_index_pos = (i + 1) as u32;
652        }
653        v
654    } else {
655        let link = index.split_link.as_ref().ok_or_else(|| {
656            Error::IndexError("split index missing base link during reuse".to_owned())
657        })?;
658        load_shared_entries(git_dir, path, &link.base_oid)?
659    };
660
661    // After a shared-index rebuild, `base_snapshot` matches the merged index exactly; align indices
662    // (e.g. `--no-split-index` then `--split-index`). When reusing an on-disk shared file, do not
663    // remap by path — deleted paths can still exist in the shared index until expiry/rebuild, and
664    // re-adding the same path must stay split-only (`base_index_pos` 0) like Git.
665    if rebuild {
666        for e in &mut index.entries {
667            if let Some(i) = base_snapshot
668                .iter()
669                .position(|b| b.path == e.path && b.stage() == e.stage())
670            {
671                e.base_index_pos = (i + 1) as u32;
672            } else {
673                e.base_index_pos = 0;
674            }
675        }
676    }
677
678    let base_oid = if rebuild {
679        let shared_index = Index {
680            version: index.version,
681            entries: base_snapshot.clone(),
682            sparse_directories: false,
683            untracked_cache: None,
684            fsmonitor_last_update: None,
685            resolve_undo: None,
686            split_link: None,
687            cache_tree_root: None,
688            cache_tree: None,
689        };
690        let tmp = match tempfile::NamedTempFile::new_in(git_dir) {
691            Ok(t) => t,
692            Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
693                // Git: mks_tempfile_sm failure falls back to a unified index (no `link` extension).
694                index.split_link = None;
695                for e in &mut index.entries {
696                    e.base_index_pos = 0;
697                }
698                index.write_to_path(path, skip_hash)?;
699                adjust_shared_perm_file(path, shared_repo).map_err(Error::Io)?;
700                return Ok(());
701            }
702            Err(e) => return Err(Error::Io(e)),
703        };
704        let tmp_path = tmp.path().to_path_buf();
705        shared_index.write_to_path(&tmp_path, skip_hash)?;
706        adjust_shared_perm_file(&tmp_path, shared_repo).map_err(Error::Io)?;
707        let file_data = fs::read(&tmp_path).map_err(Error::Io)?;
708        if file_data.len() < 20 {
709            return Err(Error::IndexError("shared index temp too short".to_owned()));
710        }
711        let body = &file_data[..file_data.len() - 20];
712        let oid = hash_index_body(body);
713        let dest = git_dir.join(format!("sharedindex.{}", oid.to_hex()));
714        if let Err(e) = fs::rename(&tmp_path, &dest) {
715            if e.kind() == io::ErrorKind::PermissionDenied {
716                let _ = fs::remove_file(&tmp_path);
717                index.split_link = None;
718                for ent in &mut index.entries {
719                    ent.base_index_pos = 0;
720                }
721                index.write_to_path(path, skip_hash)?;
722                adjust_shared_perm_file(path, shared_repo).map_err(Error::Io)?;
723                return Ok(());
724            }
725            return Err(Error::Io(e));
726        }
727        clean_stale_shared_index_files(git_dir, &oid.to_hex(), cfg);
728        oid
729    } else {
730        let oid = index
731            .split_link
732            .as_ref()
733            .ok_or_else(|| {
734                Error::IndexError("split index missing base link during reuse".to_owned())
735            })?
736            .base_oid;
737        freshen_shared_index(&resolve_shared_index_file(git_dir, path, &oid));
738        oid
739    };
740
741    // Map each shared-index row to the merged entry that claims it (`ce->index`), like Git
742    // `prepare_to_write_split_index` (path must still match that row).
743    let mut merged_by_pos: Vec<Option<usize>> = vec![None; base_snapshot.len()];
744    for (p, e) in index.entries.iter().enumerate() {
745        if e.base_index_pos == 0 {
746            continue;
747        }
748        let i = e.base_index_pos.saturating_sub(1) as usize;
749        if i < base_snapshot.len()
750            && base_snapshot[i].path == e.path
751            && base_snapshot[i].stage() == e.stage()
752        {
753            merged_by_pos[i] = Some(p);
754        }
755    }
756
757    let mut delete_bm = EwahBitmap::new();
758    let mut replace_bm = EwahBitmap::new();
759    let mut main_entries: Vec<IndexEntry> = Vec::new();
760
761    for i in 0..base_snapshot.len() {
762        let b = &base_snapshot[i];
763        if let Some(p) = merged_by_pos[i] {
764            let c = &index.entries[p];
765            if entries_equal_for_split(b, c) {
766                continue;
767            }
768            replace_bm.set_bit_extend(i);
769            let mut stub = c.clone();
770            stub.path.clear();
771            stub.base_index_pos = 0;
772            main_entries.push(stub);
773        } else {
774            delete_bm.set_bit_extend(i);
775        }
776    }
777
778    for e in &index.entries {
779        if e.base_index_pos == 0 {
780            let mut c = e.clone();
781            c.base_index_pos = 0;
782            main_entries.push(c);
783            continue;
784        }
785        let i = e.base_index_pos.saturating_sub(1) as usize;
786        if i >= base_snapshot.len()
787            || base_snapshot[i].path != e.path
788            || base_snapshot[i].stage() != e.stage()
789        {
790            let mut c = e.clone();
791            c.base_index_pos = 0;
792            main_entries.push(c);
793            continue;
794        }
795        if entries_equal_for_split(&base_snapshot[i], e) {
796            continue;
797        }
798        // Replacement: stub already pushed above.
799    }
800
801    main_entries.sort_by(|a, b| a.path.cmp(&b.path).then_with(|| a.stage().cmp(&b.stage())));
802
803    let link = SplitIndexLink {
804        base_oid,
805        delete_bitmap: Some(delete_bm),
806        replace_bitmap: Some(replace_bm),
807    };
808
809    let out_index = Index {
810        version: index.version,
811        entries: main_entries,
812        sparse_directories: false,
813        untracked_cache: index.untracked_cache.clone(),
814        fsmonitor_last_update: index.fsmonitor_last_update.clone(),
815        resolve_undo: None,
816        split_link: Some(link),
817        cache_tree_root: index.cache_tree_root,
818        cache_tree: index.cache_tree.clone(),
819    };
820
821    out_index.write_to_path(path, skip_hash)?;
822    adjust_shared_perm_file(path, shared_repo).map_err(Error::Io)?;
823
824    for e in &mut index.entries {
825        if let Some(pos) = find_entry_pos_sorted(&base_snapshot, &e.path, e.stage()) {
826            if entries_equal_for_split(&base_snapshot[pos], e) {
827                e.base_index_pos = (pos + 1) as u32;
828                continue;
829            }
830        }
831        e.base_index_pos = 0;
832    }
833
834    index.split_link = Some(SplitIndexLink {
835        base_oid,
836        delete_bitmap: None,
837        replace_bitmap: None,
838    });
839
840    Ok(())
841}
842
843/// Human-readable split-index dump for `test-tool dump-split-index`.
844/// If `index` has a split `link` extension, load the shared index and merge entries.
845pub fn resolve_split_index_if_needed(
846    index: &mut Index,
847    git_dir: &Path,
848    index_path: &Path,
849) -> Result<()> {
850    let Some(link) = index.split_link.clone() else {
851        return Ok(());
852    };
853    if link.base_oid.is_zero() {
854        return Ok(());
855    }
856    let base_oid = link.base_oid;
857    let shared_path = resolve_shared_index_file(git_dir, index_path, &base_oid);
858    let data = fs::read(&shared_path).map_err(|e| {
859        Error::IndexError(format!(
860            "split index: cannot read shared index {}: {e}",
861            shared_path.display()
862        ))
863    })?;
864    if data.len() < 20 {
865        return Err(Error::IndexError(
866            "split index: shared index too short".to_owned(),
867        ));
868    }
869    let body = &data[..data.len() - 20];
870    let got = hash_index_body(body);
871    if got != base_oid {
872        return Err(Error::IndexError(format!(
873            "broken index, expect {} in {}, got {}",
874            base_oid.to_hex(),
875            shared_path.display(),
876            got.to_hex()
877        )));
878    }
879    freshen_shared_index(&shared_path);
880    let base_entries = Index::parse(&data)?.entries;
881    merge_split_into_index(index, link, base_entries)?;
882    index.split_link = Some(SplitIndexLink {
883        base_oid,
884        delete_bitmap: None,
885        replace_bitmap: None,
886    });
887    Ok(())
888}
889
890/// Format output for `test-tool dump-split-index` (Git reads the index with `do_read_index` only,
891/// without merging the shared base — stubs and EWAH bitmaps stay intact).
892pub fn format_dump_split_index_file(data: &[u8], index: &Index) -> Result<String> {
893    use std::fmt::Write;
894    if data.len() < 20 {
895        return Err(Error::IndexError("index too short".to_owned()));
896    }
897    let body = &data[..data.len() - 20];
898    let trail = &data[data.len() - 20..];
899    let own = if trail.iter().all(|&b| b == 0) {
900        hash_index_body(body)
901    } else {
902        ObjectId::from_bytes(trail)?
903    };
904
905    let mut s = String::new();
906    writeln!(s, "own {}", own.to_hex()).map_err(|e| Error::IndexError(e.to_string()))?;
907    let Some(link) = &index.split_link else {
908        writeln!(s, "not a split index").map_err(|e| Error::IndexError(e.to_string()))?;
909        return Ok(s);
910    };
911    writeln!(s, "base {}", link.base_oid.to_hex()).map_err(|e| Error::IndexError(e.to_string()))?;
912    for e in &index.entries {
913        // Split-index replacement stubs use `CE_STRIP_NAME`: zero-length path on disk (Git still prints the line).
914        let path_disp = String::from_utf8_lossy(&e.path);
915        writeln!(
916            s,
917            "{:06o} {} {}\t{}",
918            e.mode,
919            e.oid.to_hex(),
920            e.stage(),
921            path_disp
922        )
923        .map_err(|e| Error::IndexError(e.to_string()))?;
924    }
925    write!(s, "replacements:").map_err(|e| Error::IndexError(e.to_string()))?;
926    if let Some(bm) = &link.replace_bitmap {
927        bm.each_set_bit(|pos| {
928            write!(s, " {}", pos).ok();
929        });
930    }
931    writeln!(s).map_err(|e| Error::IndexError(e.to_string()))?;
932    write!(s, "deletions:").map_err(|e| Error::IndexError(e.to_string()))?;
933    if let Some(bm) = &link.delete_bitmap {
934        bm.each_set_bit(|pos| {
935            write!(s, " {}", pos).ok();
936        });
937    }
938    writeln!(s).map_err(|e| Error::IndexError(e.to_string()))?;
939    Ok(s)
940}