Skip to main content

grit_lib/
fetch_submodules.rs

1//! Logic for `git fetch --recurse-submodules` (changed-submodule detection and config).
2//!
3//! Mirrors the subset of Git's `submodule.c` / `submodule-config.c` needed for recursive fetch:
4//! revision walking with merge-aware gitlink diffs, per-submodule recurse mode, and checking
5//! whether recorded gitlink commits are already present in a submodule repository.
6
7use std::collections::{HashMap, HashSet};
8use std::fs;
9use std::path::{Path, PathBuf};
10
11use crate::combined_tree_diff::{combined_diff_paths_filtered, CombinedTreeDiffOptions};
12use crate::config::{parse_bool as config_parse_bool, ConfigFile, ConfigScope, ConfigSet};
13use crate::diff::{diff_trees, DiffStatus};
14use crate::error::Result;
15use crate::index::MODE_GITLINK;
16use crate::merge_diff::blob_oid_at_path;
17use crate::objects::{parse_commit, ObjectId, ObjectKind};
18use crate::odb::Odb;
19use crate::refs;
20use crate::repo::Repository;
21use crate::rev_list::{rev_list, RevListOptions};
22use crate::submodule_gitdir::submodule_modules_git_dir;
23
24/// `fetch.recurseSubmodules` / `--recurse-submodules` modes for fetch.
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum FetchRecurseSubmodules {
27    /// Use submodule / `.gitmodules` defaults (Git `RECURSE_SUBMODULES_DEFAULT`).
28    Default,
29    /// Never recurse.
30    Off,
31    /// Always recurse into configured submodules (`yes` / `true`).
32    On,
33    /// Only recurse when the superproject fetch brought in new submodule commits.
34    OnDemand,
35}
36
37/// Parse `fetch.recurseSubmodules` or `--recurse-submodules=<value>` (Git `parse_fetch_recurse_submodules_arg`).
38/// Build the positive OID list for `rev-list` when diffing fetched history (Git `ref_tips_after` ∪ submodule tips).
39pub fn merge_tips_for_changed_walk(
40    submodule_commits: &[ObjectId],
41    tips_after: &[ObjectId],
42) -> Vec<String> {
43    let mut seen: HashSet<ObjectId> = HashSet::new();
44    let mut out: Vec<String> = Vec::new();
45    for o in submodule_commits {
46        if seen.insert(*o) {
47            out.push(o.to_hex());
48        }
49    }
50    for o in tips_after {
51        if seen.insert(*o) {
52            out.push(o.to_hex());
53        }
54    }
55    out
56}
57
58pub fn parse_fetch_recurse_submodules_arg(
59    opt: &str,
60    arg: &str,
61) -> std::result::Result<FetchRecurseSubmodules, String> {
62    let arg = arg.trim();
63    if arg.is_empty() {
64        return Err(format!("option `{opt}` requires a value"));
65    }
66    match config_parse_bool(arg) {
67        Ok(true) => Ok(FetchRecurseSubmodules::On),
68        Ok(false) => Ok(FetchRecurseSubmodules::Off),
69        Err(_) => {
70            if arg.eq_ignore_ascii_case("on-demand") {
71                Ok(FetchRecurseSubmodules::OnDemand)
72            } else if arg.eq_ignore_ascii_case("no") || arg.eq_ignore_ascii_case("false") {
73                Ok(FetchRecurseSubmodules::Off)
74            } else {
75                Err(format!("bad {opt} argument: {arg}"))
76            }
77        }
78    }
79}
80
81/// One submodule that gained new gitlink targets in `rev-list <tips> --not <neg>`.
82#[derive(Debug, Clone)]
83pub struct ChangedSubmoduleFetch {
84    /// Submodule name (`.gitmodules` key or worktree path for unconfigured gitlinks).
85    pub name: String,
86    /// Path in the superproject tree.
87    pub path: String,
88    /// A superproject commit OID whose tree supplies `.gitmodules` / config context.
89    pub super_oid: ObjectId,
90    /// New gitlink commit OIDs observed along the walk (unique, sorted).
91    pub new_commits: Vec<ObjectId>,
92}
93
94fn mode_from_octal(mode_str: &str) -> Option<u32> {
95    u32::from_str_radix(mode_str, 8).ok()
96}
97
98fn is_gitlink_mode(mode_str: &str) -> bool {
99    mode_from_octal(mode_str) == Some(MODE_GITLINK)
100}
101
102/// Map `submodule.<name>.path` -> name from a `.gitmodules` file body.
103fn path_to_submodule_name(gitmodules_text: &str) -> HashMap<String, String> {
104    let Ok(cfg) = ConfigFile::parse(
105        Path::new(".gitmodules"),
106        gitmodules_text,
107        ConfigScope::Local,
108    ) else {
109        return HashMap::new();
110    };
111    let mut name_to_path: HashMap<String, String> = HashMap::new();
112    for e in &cfg.entries {
113        let key = &e.key;
114        if !key.starts_with("submodule.") {
115            continue;
116        }
117        let rest = &key["submodule.".len()..];
118        let Some(last_dot) = rest.rfind('.') else {
119            continue;
120        };
121        let name = rest[..last_dot].to_string();
122        let var = &rest[last_dot + 1..];
123        if var == "path" {
124            if let Some(p) = e.value.as_deref() {
125                name_to_path.insert(name, p.to_string());
126            }
127        }
128    }
129    name_to_path
130        .into_iter()
131        .map(|(name, path)| (path, name))
132        .collect()
133}
134
135fn gitmodules_blob_text(odb: &Odb, commit_tree: &ObjectId) -> Option<String> {
136    let oid = blob_oid_at_path(odb, commit_tree, ".gitmodules")?;
137    let obj = odb.read(&oid).ok()?;
138    if obj.kind != ObjectKind::Blob {
139        return None;
140    }
141    String::from_utf8(obj.data).ok()
142}
143
144fn resolve_submodule_name_for_path(
145    odb: &Odb,
146    commit_tree: &ObjectId,
147    path: &str,
148    super_work_tree: Option<&Path>,
149) -> Option<String> {
150    if let Some(text) = gitmodules_blob_text(odb, commit_tree) {
151        let m = path_to_submodule_name(&text);
152        if let Some(n) = m.get(path) {
153            return Some(n.clone());
154        }
155    }
156    let wt_path = super_work_tree?.join(path);
157    if wt_path.join(".git").exists() {
158        return Some(path.to_string());
159    }
160    None
161}
162
163/// Walk `rev_list(repo, positive_hex, negative_hex)` and collect submodule names whose gitlink
164/// targets changed, matching Git's `collect_changed_submodules` + name resolution.
165pub fn collect_changed_submodules_for_fetch(
166    repo: &Repository,
167    positive_hex: &[String],
168    negative_hex: &[String],
169) -> Result<Vec<ChangedSubmoduleFetch>> {
170    if positive_hex.is_empty() {
171        return Ok(Vec::new());
172    }
173    let options = RevListOptions::default();
174    let walked = rev_list(repo, positive_hex, negative_hex, &options)?;
175    let odb = &repo.odb;
176    let walk_opts = CombinedTreeDiffOptions {
177        recursive: true,
178        tree_in_recursive: false,
179    };
180    let super_wt = repo.work_tree.as_deref();
181
182    let mut by_name: HashMap<String, ChangedSubmoduleFetch> = HashMap::new();
183
184    for commit_oid in walked.commits {
185        let obj = odb.read(&commit_oid)?;
186        if obj.kind != ObjectKind::Commit {
187            continue;
188        }
189        let commit = parse_commit(&obj.data)?;
190        let parents = commit.parents;
191
192        let mut record_gitlink =
193            |path: String, oid: ObjectId, super_tree: &ObjectId| -> Result<()> {
194                let Some(name) = resolve_submodule_name_for_path(odb, super_tree, &path, super_wt)
195                else {
196                    return Ok(());
197                };
198                by_name
199                    .entry(name.clone())
200                    .and_modify(|e| {
201                        if !e.new_commits.contains(&oid) {
202                            e.new_commits.push(oid);
203                        }
204                    })
205                    .or_insert_with(|| ChangedSubmoduleFetch {
206                        name,
207                        path: path.clone(),
208                        super_oid: commit_oid,
209                        new_commits: vec![oid],
210                    });
211                Ok(())
212            };
213
214        if parents.is_empty() {
215            let entries = diff_trees(odb, None, Some(&commit.tree), "")?;
216            for e in entries {
217                if !is_gitlink_mode(&e.new_mode) {
218                    continue;
219                }
220                record_gitlink(e.path().to_string(), e.new_oid, &commit.tree)?;
221            }
222        } else if parents.len() == 1 {
223            let pobj = odb.read(&parents[0])?;
224            if pobj.kind != ObjectKind::Commit {
225                continue;
226            }
227            let parent = parse_commit(&pobj.data)?;
228            let entries = diff_trees(odb, Some(&parent.tree), Some(&commit.tree), "")?;
229            for e in entries {
230                if !matches!(
231                    e.status,
232                    DiffStatus::Added
233                        | DiffStatus::Modified
234                        | DiffStatus::TypeChanged
235                        | DiffStatus::Renamed
236                ) {
237                    continue;
238                }
239                let (mode, oid) = match e.status {
240                    DiffStatus::Deleted => continue,
241                    _ => (&e.new_mode, e.new_oid),
242                };
243                if !is_gitlink_mode(mode) {
244                    continue;
245                }
246                let path = e
247                    .new_path
248                    .as_deref()
249                    .or(e.old_path.as_deref())
250                    .unwrap_or("")
251                    .to_string();
252                if path.is_empty() {
253                    continue;
254                }
255                record_gitlink(path, oid, &commit.tree)?;
256            }
257        } else {
258            let paths =
259                combined_diff_paths_filtered(odb, &commit.tree, &parents, &walk_opts, None)?;
260            for p in paths {
261                if (p.merge_mode & 0o170000) != MODE_GITLINK {
262                    continue;
263                }
264                if p.merge_oid.is_zero() {
265                    continue;
266                }
267                record_gitlink(p.path, p.merge_oid, &commit.tree)?;
268            }
269        }
270    }
271
272    let mut out: Vec<ChangedSubmoduleFetch> = by_name.into_values().collect();
273    for e in &mut out {
274        e.new_commits.sort();
275        e.new_commits.dedup();
276    }
277    out.sort_by(|a, b| a.name.cmp(&b.name));
278    Ok(out)
279}
280
281/// True when every OID in `commits` exists as a commit object in `sub_odb` and is reachable from
282/// some ref (`git rev-list -n 1 <oids> --not --all` is empty), matching Git's `submodule_has_commits`.
283pub fn submodule_has_all_commits(sub_odb: &Odb, commits: &[ObjectId]) -> Result<bool> {
284    for oid in commits {
285        let obj = match sub_odb.read(oid) {
286            Ok(o) => o,
287            Err(_) => return Ok(false),
288        };
289        if obj.kind != ObjectKind::Commit {
290            return Ok(false);
291        }
292    }
293    if commits.is_empty() {
294        return Ok(true);
295    }
296    let repo_dir = sub_odb
297        .objects_dir()
298        .parent()
299        .unwrap_or_else(|| sub_odb.objects_dir());
300    let all_refs = refs::list_refs(repo_dir, "refs/")?;
301    let mut reachable: HashSet<ObjectId> = HashSet::new();
302    for (_, r_oid) in all_refs {
303        let mut stack = vec![r_oid];
304        while let Some(c) = stack.pop() {
305            if !reachable.insert(c) {
306                continue;
307            }
308            let Ok(obj) = sub_odb.read(&c) else {
309                continue;
310            };
311            if obj.kind != ObjectKind::Commit {
312                continue;
313            }
314            let Ok(parsed) = parse_commit(&obj.data) else {
315                continue;
316            };
317            for p in parsed.parents {
318                stack.push(p);
319            }
320        }
321    }
322    Ok(commits.iter().all(|c| reachable.contains(c)))
323}
324
325/// Whether a submodule at `path` is active for fetch at `super_oid` (Git `is_tree_submodule_active` subset).
326pub fn is_submodule_active_for_fetch(
327    _repo: &Repository,
328    config: &ConfigSet,
329    _super_tree_oid: &ObjectId,
330    _path: &str,
331    submodule_name: &str,
332) -> bool {
333    let active_key = format!("submodule.{submodule_name}.active");
334    if let Some(v) = config.get(&active_key) {
335        if let Ok(b) = config_parse_bool(v.trim()) {
336            return b;
337        }
338    }
339    let url_key = format!("submodule.{submodule_name}.url");
340    config.get(&url_key).is_some()
341}
342
343/// Superproject has at least one submodule under `.git/modules/` (Git `repo_has_absorbed_submodules`).
344pub fn repo_has_absorbed_submodules(super_git_dir: &Path) -> bool {
345    let p = super_git_dir.join("modules");
346    p.is_dir()
347        && fs::read_dir(&p)
348            .map(|mut d| d.next().is_some())
349            .unwrap_or(false)
350}
351
352/// `.gitmodules` says at least one submodule exists (path+url), or absorbed modules exist.
353pub fn might_have_submodules_to_fetch(work_tree: &Path, super_git_dir: &Path) -> bool {
354    if work_tree.join(".gitmodules").exists() {
355        return true;
356    }
357    repo_has_absorbed_submodules(super_git_dir)
358}
359
360/// Open the git directory for a submodule at `rel_path` (work tree or `.git/modules/` fallback).
361pub fn submodule_git_dir_for_fetch(super_repo: &Repository, rel_path: &str) -> Option<PathBuf> {
362    let wt = super_repo.work_tree.as_ref()?;
363    let abs = wt.join(rel_path);
364    if abs.join(".git").exists() {
365        if abs.join(".git").is_file() {
366            let Ok(line) = fs::read_to_string(abs.join(".git")) else {
367                return None;
368            };
369            let line = line.trim();
370            let rest = line.strip_prefix("gitdir:")?.trim();
371            let gd = if Path::new(rest).is_absolute() {
372                PathBuf::from(rest)
373            } else {
374                abs.join(rest)
375            };
376            return fs::canonicalize(&gd).ok().or(Some(gd));
377        }
378        return Some(abs.join(".git"));
379    }
380    let modules = submodule_modules_git_dir(&super_repo.git_dir, rel_path);
381    if modules.join("HEAD").exists() {
382        return Some(modules);
383    }
384    None
385}