Skip to main content

grit_lib/
submodule_config_cache.rs

1//! Submodule configuration cache (Git `submodule-config.c` subset for test-tool).
2//!
3//! Parses `.gitmodules` blobs keyed by blob OID and supports lookup by path or
4//! logical submodule name, matching the behavior exercised by `t7411-submodule-config`.
5
6use std::collections::HashMap;
7use std::path::Path;
8
9use crate::config::{canonical_key, ConfigFile, ConfigScope};
10use crate::merge_diff::blob_oid_at_path;
11use crate::objects::{parse_commit, ObjectId, ObjectKind};
12use crate::odb::Odb;
13use crate::repo::Repository;
14use crate::rev_parse::resolve_revision;
15
16/// Resolved submodule identity for test output (`Submodule name: 'x' for path 'y'`).
17#[derive(Clone, Debug, PartialEq, Eq)]
18pub struct SubmoduleInfo {
19    /// Logical submodule name from `.gitmodules`.
20    pub name: String,
21    /// Checkout path relative to the superproject root.
22    pub path: String,
23}
24
25#[derive(Clone, Copy, Debug, PartialEq, Eq)]
26enum FetchRecurse {
27    None,
28    On,
29    Off,
30    OnDemand,
31    Error,
32}
33
34#[derive(Clone, Debug)]
35struct SubmoduleBuild {
36    name: String,
37    path: Option<String>,
38    url: Option<String>,
39    fetch_recurse: FetchRecurse,
40}
41
42impl SubmoduleBuild {
43    fn new(name: String) -> Self {
44        Self {
45            name,
46            path: None,
47            url: None,
48            fetch_recurse: FetchRecurse::None,
49        }
50    }
51}
52
53/// Cache of parsed `.gitmodules` blobs (by blob OID) plus path/name indexes.
54#[derive(Default)]
55pub struct SubmoduleConfigCache {
56    by_blob: HashMap<ObjectId, Vec<SubmoduleBuild>>,
57    path_index: HashMap<(ObjectId, String), String>,
58    name_index: HashMap<(ObjectId, String), SubmoduleBuild>,
59}
60
61impl SubmoduleConfigCache {
62    /// Creates an empty cache.
63    #[must_use]
64    pub fn new() -> Self {
65        Self::default()
66    }
67
68    /// Looks up a submodule by checkout path for the given treeish (commit or tree OID).
69    ///
70    /// `treeish` is `None` for the worktree / index / `HEAD` `.gitmodules` layer (Git null OID).
71    pub fn submodule_from_path(
72        &mut self,
73        repo: &Repository,
74        treeish: Option<(ObjectId, ObjectId)>,
75        path: &str,
76    ) -> Result<Option<SubmoduleInfo>, String> {
77        let gm_oid = self.gitmodules_oid_for_treeish(repo, treeish)?;
78        let gm_oid = match gm_oid {
79            Some(o) => o,
80            None => return Ok(None),
81        };
82        self.ensure_blob_parsed(repo, treeish, gm_oid)?;
83        let key_path = norm_path_key(path);
84        let name = self
85            .path_index
86            .get(&(gm_oid, key_path.clone()))
87            .cloned()
88            .or_else(|| self.path_index.get(&(gm_oid, path.to_string())).cloned());
89        let Some(name) = name else {
90            return Ok(None);
91        };
92        let path_out = self
93            .name_index
94            .get(&(gm_oid, name.clone()))
95            .and_then(|b| b.path.clone())
96            .unwrap_or_else(|| path.to_string());
97        Ok(Some(SubmoduleInfo {
98            name,
99            path: path_out,
100        }))
101    }
102
103    /// Looks up a submodule by its logical `.gitmodules` name.
104    pub fn submodule_from_name(
105        &mut self,
106        repo: &Repository,
107        treeish: Option<(ObjectId, ObjectId)>,
108        name: &str,
109    ) -> Result<Option<SubmoduleInfo>, String> {
110        let gm_oid = self.gitmodules_oid_for_treeish(repo, treeish)?;
111        let gm_oid = match gm_oid {
112            Some(o) => o,
113            None => return Ok(None),
114        };
115        self.ensure_blob_parsed(repo, treeish, gm_oid)?;
116        let b = self.name_index.get(&(gm_oid, name.to_string())).cloned();
117        let Some(b) = b else {
118            return Ok(None);
119        };
120        let Some(path) = b.path.clone() else {
121            return Ok(None);
122        };
123        Ok(Some(SubmoduleInfo { name: b.name, path }))
124    }
125
126    fn gitmodules_oid_for_treeish(
127        &self,
128        repo: &Repository,
129        treeish: Option<(ObjectId, ObjectId)>,
130    ) -> Result<Option<ObjectId>, String> {
131        let Some((_rev, tree_oid)) = treeish else {
132            return self.gitmodules_oid_worktree_index_head(repo);
133        };
134        Ok(blob_oid_at_path(&repo.odb, &tree_oid, ".gitmodules"))
135    }
136
137    /// Where to read `.gitmodules` for null treeish: disk, else index blob, else `HEAD` tree (Git `config_from_gitmodules`).
138    fn gitmodules_oid_worktree_index_head(
139        &self,
140        repo: &Repository,
141    ) -> Result<Option<ObjectId>, String> {
142        let Some(wt) = repo.work_tree.as_ref() else {
143            return Ok(None);
144        };
145        if wt.join(".gitmodules").exists() {
146            return Ok(Some(ObjectId::zero()));
147        }
148        let index = repo.load_index().map_err(|e| e.to_string())?;
149        if let Some(ie) = index.get(b".gitmodules", 0) {
150            return Ok(Some(ie.oid));
151        }
152        let head_oid = crate::state::resolve_head(&repo.git_dir)
153            .map_err(|e| e.to_string())?
154            .oid()
155            .copied();
156        let Some(commit_oid) = head_oid else {
157            return Ok(None);
158        };
159        let obj = repo.odb.read(&commit_oid).map_err(|e| e.to_string())?;
160        if obj.kind != ObjectKind::Commit {
161            return Ok(None);
162        }
163        let commit = parse_commit(&obj.data).map_err(|e| e.to_string())?;
164        Ok(blob_oid_at_path(&repo.odb, &commit.tree, ".gitmodules"))
165    }
166
167    fn ensure_blob_parsed(
168        &mut self,
169        repo: &Repository,
170        treeish: Option<(ObjectId, ObjectId)>,
171        gitmodules_blob: ObjectId,
172    ) -> Result<(), String> {
173        if self.by_blob.contains_key(&gitmodules_blob) {
174            return Ok(());
175        }
176        if gitmodules_blob.is_zero() {
177            let Some(wt) = repo.work_tree.as_ref() else {
178                self.by_blob.insert(gitmodules_blob, Vec::new());
179                return Ok(());
180            };
181            let path = wt.join(".gitmodules");
182            let text = if path.exists() {
183                std::fs::read_to_string(&path).map_err(|e| e.to_string())?
184            } else {
185                let index = repo.load_index().map_err(|e| e.to_string())?;
186                if let Some(ie) = index.get(b".gitmodules", 0) {
187                    let obj = repo.odb.read(&ie.oid).map_err(|e| e.to_string())?;
188                    if obj.kind != ObjectKind::Blob {
189                        self.by_blob.insert(gitmodules_blob, Vec::new());
190                        return Ok(());
191                    }
192                    String::from_utf8(obj.data).map_err(|e| e.to_string())?
193                } else {
194                    let head_oid = crate::state::resolve_head(&repo.git_dir)
195                        .ok()
196                        .and_then(|h| h.oid().copied());
197                    let Some(commit_oid) = head_oid else {
198                        self.by_blob.insert(gitmodules_blob, Vec::new());
199                        return Ok(());
200                    };
201                    let obj = repo.odb.read(&commit_oid).map_err(|e| e.to_string())?;
202                    if obj.kind != ObjectKind::Commit {
203                        self.by_blob.insert(gitmodules_blob, Vec::new());
204                        return Ok(());
205                    }
206                    let commit = parse_commit(&obj.data).map_err(|e| e.to_string())?;
207                    let Some(blob_oid) = blob_oid_at_path(&repo.odb, &commit.tree, ".gitmodules")
208                    else {
209                        self.by_blob.insert(gitmodules_blob, Vec::new());
210                        return Ok(());
211                    };
212                    let blob = repo.odb.read(&blob_oid).map_err(|e| e.to_string())?;
213                    if blob.kind != ObjectKind::Blob {
214                        self.by_blob.insert(gitmodules_blob, Vec::new());
215                        return Ok(());
216                    }
217                    String::from_utf8(blob.data).map_err(|e| e.to_string())?
218                }
219            };
220            self.ingest_gitmodules_blob(repo, None, None, ObjectId::zero(), &text, true)?;
221            return Ok(());
222        }
223        let obj = repo
224            .odb
225            .read(&gitmodules_blob)
226            .map_err(|e| format!("failed to read .gitmodules blob: {e}"))?;
227        if obj.kind != ObjectKind::Blob {
228            self.by_blob.insert(gitmodules_blob, Vec::new());
229            return Ok(());
230        }
231        let text = String::from_utf8(obj.data).map_err(|e| e.to_string())?;
232        let commit_for_warn = treeish.map(|(rev, _)| rev).filter(|o| !o.is_zero());
233        self.ingest_gitmodules_blob(
234            repo,
235            commit_for_warn,
236            treeish.map(|(rev, _)| rev),
237            gitmodules_blob,
238            &text,
239            false,
240        )?;
241        Ok(())
242    }
243
244    fn ingest_gitmodules_blob(
245        &mut self,
246        repo: &Repository,
247        treeish_for_warning: Option<ObjectId>,
248        treeish_for_blob_spec: Option<ObjectId>,
249        gitmodules_blob: ObjectId,
250        content: &str,
251        die_on_bad_fetch_recurse: bool,
252    ) -> Result<(), String> {
253        if self.by_blob.contains_key(&gitmodules_blob) {
254            return Ok(());
255        }
256
257        let (git_entries, bad_line) = ConfigFile::parse_gitmodules_best_effort(
258            Path::new(".gitmodules"),
259            content,
260            ConfigScope::Local,
261        );
262        if let Some(line) = bad_line {
263            eprintln!(
264                "{}",
265                gitmodules_config_error(
266                    repo,
267                    treeish_for_blob_spec,
268                    gitmodules_blob,
269                    line,
270                    "bad config",
271                )
272            );
273        }
274
275        let mut by_name: HashMap<String, SubmoduleBuild> = HashMap::new();
276
277        for ent in &git_entries {
278            let Some((name, var)) = submodule_name_and_var(&ent.key) else {
279                continue;
280            };
281            if !check_submodule_name_ok(&name) {
282                eprintln!("warning: ignoring suspicious submodule name: {name}");
283                continue;
284            }
285            let entry = by_name
286                .entry(name.clone())
287                .or_insert_with(|| SubmoduleBuild::new(name.clone()));
288
289            match var.as_str() {
290                "path" => {
291                    let Some(value) = ent.value.as_deref() else {
292                        return Err(gitmodules_config_error(
293                            repo,
294                            treeish_for_blob_spec,
295                            gitmodules_blob,
296                            ent.line,
297                            "bad config",
298                        ));
299                    };
300                    if crate::gitmodules::looks_like_command_line_option(value) {
301                        eprintln!(
302                            "warning: ignoring '{}' which may be interpreted as a command-line option: {value}",
303                            ent.key
304                        );
305                        continue;
306                    }
307                    let overwrite = gitmodules_blob.is_zero();
308                    if entry.path.is_some() && !overwrite {
309                        warn_multiple_config(treeish_for_warning, &entry.name, "path");
310                    } else {
311                        if let Some(old) = &entry.path {
312                            self.path_index_remove(gitmodules_blob, old);
313                        }
314                        entry.path = Some(value.to_string());
315                        self.path_index_insert(gitmodules_blob, value, entry.name.clone());
316                    }
317                }
318                "url" => {
319                    let Some(value) = ent.value.as_deref() else {
320                        return Err(gitmodules_config_error(
321                            repo,
322                            treeish_for_blob_spec,
323                            gitmodules_blob,
324                            ent.line,
325                            "bad config",
326                        ));
327                    };
328                    if crate::gitmodules::looks_like_command_line_option(value) {
329                        eprintln!(
330                            "warning: ignoring '{}' which may be interpreted as a command-line option: {value}",
331                            ent.key
332                        );
333                        continue;
334                    }
335                    let overwrite = gitmodules_blob.is_zero();
336                    if entry.url.is_some() && !overwrite {
337                        warn_multiple_config(treeish_for_warning, &entry.name, "url");
338                    } else {
339                        entry.url = Some(value.to_string());
340                    }
341                }
342                "fetchrecursesubmodules" => {
343                    let value = ent.value.as_deref().unwrap_or("");
344                    let parsed = parse_fetch_recurse(value, die_on_bad_fetch_recurse);
345                    let parsed = parsed?;
346                    let overwrite = gitmodules_blob.is_zero();
347                    if entry.fetch_recurse != FetchRecurse::None && !overwrite {
348                        warn_multiple_config(
349                            treeish_for_warning,
350                            &entry.name,
351                            "fetchrecursesubmodules",
352                        );
353                    } else {
354                        entry.fetch_recurse = parsed;
355                    }
356                }
357                "ignore" => {
358                    let Some(value) = ent.value.as_deref() else {
359                        return Err(gitmodules_config_error(
360                            repo,
361                            treeish_for_blob_spec,
362                            gitmodules_blob,
363                            ent.line,
364                            "bad config",
365                        ));
366                    };
367                    let _ = value;
368                }
369                "branch" => {
370                    let Some(_value) = ent.value.as_deref() else {
371                        return Err(gitmodules_config_error(
372                            repo,
373                            treeish_for_blob_spec,
374                            gitmodules_blob,
375                            ent.line,
376                            "bad config",
377                        ));
378                    };
379                }
380                "update" | "shallow" => {}
381                _ => {}
382            }
383        }
384
385        let list: Vec<SubmoduleBuild> = by_name.into_values().collect();
386        for b in &list {
387            self.name_index
388                .insert((gitmodules_blob, b.name.clone()), b.clone());
389        }
390        self.by_blob.insert(gitmodules_blob, list);
391        Ok(())
392    }
393
394    fn path_index_insert(&mut self, blob: ObjectId, path: &str, name: String) {
395        let key = norm_path_key(path);
396        self.path_index.insert((blob, key), name);
397    }
398
399    fn path_index_remove(&mut self, blob: ObjectId, path: &str) {
400        let key = norm_path_key(path);
401        self.path_index.remove(&(blob, key));
402    }
403
404    /// Prints all values for `key` (canonical submodule config key) from the nested
405    /// submodule repository at `super_path` / `submodule_path`.
406    pub fn print_config_from_nested_gitmodules(
407        _super_repo: &Repository,
408        super_work_tree: &Path,
409        submodule_path: &str,
410        key: &str,
411    ) -> Result<(), String> {
412        let wanted = canonical_key(key).map_err(|e| e.to_string())?;
413        let sub_work = super_work_tree.join(submodule_path);
414        let sub_git = if sub_work.join(".git").is_file() {
415            let gf = std::fs::read_to_string(sub_work.join(".git"))
416                .map_err(|e| format!("read gitfile: {e}"))?;
417            let line = gf.lines().next().unwrap_or("").trim();
418            let Some(rest) = line.strip_prefix("gitdir:") else {
419                return Err("invalid gitfile".into());
420            };
421            let rest = rest.trim();
422            let p = Path::new(rest);
423            if p.is_absolute() {
424                p.to_path_buf()
425            } else {
426                sub_work.join(rest)
427            }
428        } else {
429            sub_work.join(".git")
430        };
431        let sub_repo = Repository::open(&sub_git, Some(&sub_work))
432            .map_err(|e| format!("open submodule repo: {e}"))?;
433
434        let gm_path = sub_work.join(".gitmodules");
435        let (content, _) = if gm_path.exists() {
436            let c = std::fs::read_to_string(&gm_path).map_err(|e| e.to_string())?;
437            (c, gm_path)
438        } else {
439            let index = sub_repo.load_index().map_err(|e| e.to_string())?;
440            if let Some(ie) = index.get(b".gitmodules", 0) {
441                let obj = sub_repo.odb.read(&ie.oid).map_err(|e| e.to_string())?;
442                if obj.kind != ObjectKind::Blob {
443                    return Ok(());
444                }
445                let c = String::from_utf8(obj.data).map_err(|e| e.to_string())?;
446                (c, gm_path)
447            } else {
448                let head_oid = crate::state::resolve_head(&sub_repo.git_dir)
449                    .ok()
450                    .and_then(|h| h.oid().copied());
451                let Some(commit_oid) = head_oid else {
452                    return Ok(());
453                };
454                let obj = sub_repo.odb.read(&commit_oid).map_err(|e| e.to_string())?;
455                if obj.kind != ObjectKind::Commit {
456                    return Ok(());
457                }
458                let commit = parse_commit(&obj.data).map_err(|e| e.to_string())?;
459                let Some(blob_oid) = blob_oid_at_path(&sub_repo.odb, &commit.tree, ".gitmodules")
460                else {
461                    return Ok(());
462                };
463                let blob = sub_repo.odb.read(&blob_oid).map_err(|e| e.to_string())?;
464                if blob.kind != ObjectKind::Blob {
465                    return Ok(());
466                }
467                let c = String::from_utf8(blob.data).map_err(|e| e.to_string())?;
468                (c, gm_path)
469            }
470        };
471
472        let cfg = ConfigFile::parse(Path::new(".gitmodules"), &content, ConfigScope::Local)
473            .map_err(|e| e.to_string())?;
474        for e in &cfg.entries {
475            if e.key == wanted {
476                if let Some(v) = &e.value {
477                    println!("{v}");
478                }
479            }
480        }
481        Ok(())
482    }
483}
484
485fn norm_path_key(path: &str) -> String {
486    path.replace('\\', "/")
487}
488
489fn warn_multiple_config(treeish: Option<ObjectId>, name: &str, option: &str) {
490    let commit_string = treeish
491        .map(|o| o.to_hex())
492        .unwrap_or_else(|| "WORKTREE".to_string());
493    eprintln!(
494        "warning: {commit_string}:.gitmodules, multiple configurations found for \
495'submodule.{name}.{option}'. Skipping second one!"
496    );
497}
498
499fn gitmodules_config_error(
500    repo: &Repository,
501    treeish_for_blob: Option<ObjectId>,
502    gitmodules_blob: ObjectId,
503    line: usize,
504    msg: &str,
505) -> String {
506    if gitmodules_blob.is_zero() {
507        format!("{msg} line {line} in file .gitmodules")
508    } else {
509        let spec = submodule_blob_spec(repo, treeish_for_blob, gitmodules_blob);
510        format!("{msg} line {line} in submodule-blob {spec}")
511    }
512}
513
514/// Git names submodule-blob config sources as `<commit>:.gitmodules` (see `gitmodule_oid_from_commit`).
515fn submodule_blob_spec(
516    repo: &Repository,
517    treeish_for_blob: Option<ObjectId>,
518    blob: ObjectId,
519) -> String {
520    let fallback = format!("{}:.gitmodules", blob.to_hex());
521    let Some(treeish) = treeish_for_blob else {
522        return fallback;
523    };
524    let Ok(obj) = repo.odb.read(&treeish) else {
525        return fallback;
526    };
527    let commit_oid = match obj.kind {
528        ObjectKind::Commit => treeish,
529        ObjectKind::Tree => {
530            let Ok(c) = find_commit_containing_tree(repo, treeish) else {
531                return fallback;
532            };
533            c
534        }
535        _ => return fallback,
536    };
537    format!("{}:.gitmodules", commit_oid.to_hex())
538}
539
540fn find_commit_containing_tree(repo: &Repository, tree_oid: ObjectId) -> Result<ObjectId, ()> {
541    let mut stack = vec![format!("HEAD^{{commit}}")];
542    for name in ["HEAD", "refs/heads/master", "refs/heads/main"] {
543        stack.push(name.to_string());
544    }
545    for spec in stack {
546        let Ok(oid) = resolve_revision(repo, spec.as_str()) else {
547            continue;
548        };
549        let Ok(obj) = repo.odb.read(&oid) else {
550            continue;
551        };
552        if obj.kind != ObjectKind::Commit {
553            continue;
554        }
555        let Ok(c) = parse_commit(&obj.data) else {
556            continue;
557        };
558        if tree_contains_oid(&repo.odb, c.tree, tree_oid)? {
559            return Ok(oid);
560        }
561    }
562    Err(())
563}
564
565fn tree_contains_oid(odb: &Odb, tree: ObjectId, target: ObjectId) -> Result<bool, ()> {
566    let obj = odb.read(&tree).map_err(|_| ())?;
567    if obj.kind != ObjectKind::Tree {
568        return Ok(false);
569    }
570    let entries = crate::objects::parse_tree(&obj.data).map_err(|_| ())?;
571    for e in entries {
572        if e.oid == target {
573            return Ok(true);
574        }
575        if e.mode == 0o040000 && tree_contains_oid(odb, e.oid, target)? {
576            return Ok(true);
577        }
578    }
579    Ok(false)
580}
581
582fn submodule_name_and_var(key: &str) -> Option<(String, String)> {
583    let rest = key.strip_prefix("submodule.")?;
584    let dot = rest.rfind('.')?;
585    let name = rest[..dot].to_string();
586    let var = rest[dot + 1..].to_string();
587    if name.is_empty() {
588        return None;
589    }
590    Some((name, var))
591}
592
593fn check_submodule_name_ok(name: &str) -> bool {
594    if name.is_empty() {
595        return false;
596    }
597    let b = name.as_bytes();
598    if b.len() >= 2
599        && b[0] == b'.'
600        && b[1] == b'.'
601        && (b.len() == 2 || b[2] == b'/' || b[2] == b'\\')
602    {
603        return false;
604    }
605    let mut i = 0usize;
606    while i < b.len() {
607        let c = b[i];
608        i += 1;
609        if c == b'/' || c == b'\\' {
610            let j = i;
611            if b.len() >= j + 2
612                && b[j] == b'.'
613                && b[j + 1] == b'.'
614                && (j + 2 >= b.len() || b[j + 2] == b'/' || b[j + 2] == b'\\')
615            {
616                return false;
617            }
618        }
619    }
620    true
621}
622
623fn parse_fetch_recurse(value: &str, die_on_error: bool) -> Result<FetchRecurse, String> {
624    let v = value.trim();
625    match crate::config::parse_bool(v) {
626        Ok(true) => return Ok(FetchRecurse::On),
627        Ok(false) => return Ok(FetchRecurse::Off),
628        Err(_) => {}
629    }
630    if v.eq_ignore_ascii_case("on-demand") {
631        return Ok(FetchRecurse::OnDemand);
632    }
633    if die_on_error {
634        Err(format!(
635            "fatal: bad submodule.fetchRecurseSubmodules argument: '{v}'"
636        ))
637    } else {
638        Ok(FetchRecurse::Error)
639    }
640}