Skip to main content

grit_lib/
gitmodules.rs

1//! `.gitmodules` validation (Git `fsck` / `submodule-config` parity).
2//!
3//! Submodule `path` and `url` values must not look like command-line options
4//! (non-empty and starting with `-`). See Git's `looks_like_command_line_option` in `path.c`.
5//!
6//! Submodule name and URL rules mirror Git's `submodule-config.c` (`check_submodule_name`,
7//! `check_submodule_url`).
8
9use std::collections::{HashMap, HashSet};
10use std::io::Write;
11use std::path::Path;
12use std::path::PathBuf;
13
14use crate::config::{ConfigFile, ConfigScope};
15use crate::error::Result;
16use crate::objects::{parse_commit, parse_tree, ObjectId, ObjectKind, TreeEntry};
17use crate::odb::Odb;
18use crate::pack::read_pack_index;
19use url::{Host, Url};
20
21/// Returns `true` when `s` is non-empty and starts with `-` (Git `looks_like_command_line_option`).
22#[must_use]
23pub fn looks_like_command_line_option(s: &str) -> bool {
24    !s.is_empty() && s.as_bytes().first() == Some(&b'-')
25}
26
27/// True when `name` names a `.gitmodules` file (HFS / NTFS spellings), not a symlink.
28#[must_use]
29pub fn tree_entry_is_gitmodules_blob(mode: u32, name: &[u8]) -> bool {
30    if mode == 0o120000 {
31        return false;
32    }
33    let Ok(name_str) = std::str::from_utf8(name) else {
34        return false;
35    };
36    is_hfs_dot_gitmodules(name_str) || is_ntfs_dot_gitmodules(name_str)
37}
38
39fn next_hfs_char(chars: &mut std::iter::Peekable<std::str::Chars>) -> Option<char> {
40    loop {
41        let ch = chars.next()?;
42        match ch {
43            '\u{200c}' | '\u{200d}' | '\u{200e}' | '\u{200f}' => continue,
44            '\u{202a}'..='\u{202e}' => continue,
45            '\u{206a}'..='\u{206f}' => continue,
46            '\u{feff}' => continue,
47            _ => return Some(ch),
48        }
49    }
50}
51
52fn is_hfs_dot_generic(path: &str, needle: &str) -> bool {
53    let mut chars = path.chars().peekable();
54    let mut c = match next_hfs_char(&mut chars) {
55        Some(x) => x,
56        None => return false,
57    };
58    if c != '.' {
59        return false;
60    }
61    for nc in needle.chars() {
62        c = match next_hfs_char(&mut chars) {
63            Some(x) => x,
64            None => return false,
65        };
66        if c as u32 > 127 {
67            return false;
68        }
69        if !c.eq_ignore_ascii_case(&nc) {
70            return false;
71        }
72    }
73    match next_hfs_char(&mut chars) {
74        None => true,
75        Some(ch) if ch == '/' => true,
76        Some(_) => false,
77    }
78}
79
80fn is_hfs_dot_gitmodules(path: &str) -> bool {
81    is_hfs_dot_generic(path, "gitmodules")
82}
83
84fn only_spaces_and_periods(name: &str, mut i: usize) -> bool {
85    let b = name.as_bytes();
86    loop {
87        let c = *b.get(i).unwrap_or(&0);
88        if c == 0 || c == b':' {
89            return true;
90        }
91        if c != b' ' && c != b'.' {
92            return false;
93        }
94        i += 1;
95    }
96}
97
98fn is_ntfs_dot_generic(name: &str, dotgit_name: &str, short_prefix: &str) -> bool {
99    let b = name.as_bytes();
100    let len = dotgit_name.len();
101    if !b.is_empty()
102        && b[0] == b'.'
103        && name.len() > len
104        && name[1..1 + len].eq_ignore_ascii_case(dotgit_name)
105    {
106        let i = len + 1;
107        return only_spaces_and_periods(name, i);
108    }
109
110    if b.len() >= 8
111        && name[..6].eq_ignore_ascii_case(&dotgit_name[..6])
112        && b[6] == b'~'
113        && (b[7] >= b'1' && b[7] <= b'4')
114    {
115        return only_spaces_and_periods(name, 8);
116    }
117
118    let mut i = 0usize;
119    let mut saw_tilde = false;
120    while i < 8 {
121        let c = *b.get(i).unwrap_or(&0);
122        if c == 0 {
123            return false;
124        }
125        if saw_tilde {
126            if !c.is_ascii_digit() {
127                return false;
128            }
129        } else if c == b'~' {
130            i += 1;
131            let d = *b.get(i).unwrap_or(&0);
132            if !(b'1'..=b'9').contains(&d) {
133                return false;
134            }
135            saw_tilde = true;
136        } else if i >= 6 {
137            return false;
138        } else if c & 0x80 != 0 {
139            return false;
140        } else {
141            let sc = short_prefix.as_bytes().get(i).copied().unwrap_or(0);
142            if (c as char).to_ascii_lowercase() != sc as char {
143                return false;
144            }
145        }
146        i += 1;
147    }
148    only_spaces_and_periods(name, i)
149}
150
151fn is_ntfs_dot_gitmodules(name: &str) -> bool {
152    is_ntfs_dot_generic(name, "gitmodules", "gi7eba")
153}
154
155fn is_hfs_dot_gitattributes(path: &str) -> bool {
156    is_hfs_dot_generic(path, "gitattributes")
157}
158
159fn is_ntfs_dot_gitattributes(name: &str) -> bool {
160    is_ntfs_dot_generic(name, "gitattributes", "gi7d29")
161}
162
163fn is_hfs_dot_gitignore(path: &str) -> bool {
164    is_hfs_dot_generic(path, "gitignore")
165}
166
167fn is_ntfs_dot_gitignore(name: &str) -> bool {
168    is_ntfs_dot_generic(name, "gitignore", "gi250a")
169}
170
171fn is_hfs_dot_mailmap(path: &str) -> bool {
172    is_hfs_dot_generic(path, "mailmap")
173}
174
175fn is_ntfs_dot_mailmap(name: &str) -> bool {
176    is_ntfs_dot_generic(name, "mailmap", "maba30")
177}
178
179/// True for a tree entry name that should be treated as `.gitattributes` for fsck (blob only).
180#[must_use]
181pub fn tree_entry_is_gitattributes_blob(mode: u32, name: &[u8]) -> bool {
182    if mode == 0o120000 {
183        return false;
184    }
185    let Ok(name_str) = std::str::from_utf8(name) else {
186        return false;
187    };
188    is_hfs_dot_gitattributes(name_str) || is_ntfs_dot_gitattributes(name_str)
189}
190
191fn is_hfs_or_ntfs_dot_gitmodules(name: &str) -> bool {
192    is_hfs_dot_gitmodules(name) || is_ntfs_dot_gitmodules(name)
193}
194
195fn is_hfs_or_ntfs_dot_gitattributes(name: &str) -> bool {
196    is_hfs_dot_gitattributes(name) || is_ntfs_dot_gitattributes(name)
197}
198
199/// Symlink and registration for one tree (Git `fsck_tree` entry loop).
200pub fn fsck_dot_special_tree_pass(
201    tree_oid: &ObjectId,
202    data: &[u8],
203    gitmodules_out: &mut HashSet<ObjectId>,
204    gitattributes_out: &mut HashSet<ObjectId>,
205) -> Result<Vec<DotFsckIssue>> {
206    let entries = parse_tree(data)?;
207    let mut issues = Vec::new();
208    for TreeEntry { mode, name, oid } in entries {
209        let Ok(name_str) = std::str::from_utf8(&name) else {
210            continue;
211        };
212        let is_symlink = mode == 0o120000;
213
214        if is_hfs_or_ntfs_dot_gitmodules(name_str) {
215            if is_symlink {
216                issues.push(DotFsckIssue::TreeSymlink {
217                    tree_oid: *tree_oid,
218                    id: "gitmodulesSymlink",
219                    detail: ".gitmodules is a symbolic link",
220                });
221            } else {
222                gitmodules_out.insert(oid);
223            }
224        }
225
226        if is_hfs_or_ntfs_dot_gitattributes(name_str) {
227            if is_symlink {
228                issues.push(DotFsckIssue::TreeSymlink {
229                    tree_oid: *tree_oid,
230                    id: "gitattributesSymlink",
231                    detail: ".gitattributes is a symlink",
232                });
233            } else {
234                gitattributes_out.insert(oid);
235            }
236        }
237
238        if is_symlink {
239            if is_hfs_dot_gitignore(name_str) || is_ntfs_dot_gitignore(name_str) {
240                issues.push(DotFsckIssue::TreeSymlink {
241                    tree_oid: *tree_oid,
242                    id: "gitignoreSymlink",
243                    detail: ".gitignore is a symlink",
244                });
245            }
246            if is_hfs_dot_mailmap(name_str) || is_ntfs_dot_mailmap(name_str) {
247                issues.push(DotFsckIssue::TreeSymlink {
248                    tree_oid: *tree_oid,
249                    id: "mailmapSymlink",
250                    detail: ".mailmap is a symlink",
251                });
252            }
253        }
254
255        let mut slash_rest = name_str;
256        while let Some(idx) = slash_rest.find('\\') {
257            let after = &slash_rest[idx + 1..];
258            if is_ntfs_dot_gitmodules(after) {
259                if is_symlink {
260                    issues.push(DotFsckIssue::TreeSymlink {
261                        tree_oid: *tree_oid,
262                        id: "gitmodulesSymlink",
263                        detail: ".gitmodules is a symbolic link",
264                    });
265                } else {
266                    gitmodules_out.insert(oid);
267                }
268            }
269            slash_rest = after;
270        }
271    }
272    Ok(issues)
273}
274
275/// Problems reported while walking trees / blobs for `.gitmodules` / `.gitattributes` fsck.
276#[derive(Debug, Clone)]
277pub enum DotFsckIssue {
278    TreeSymlink {
279        tree_oid: ObjectId,
280        id: &'static str,
281        detail: &'static str,
282    },
283    NonBlobDotFile {
284        oid: ObjectId,
285        kind: ObjectKind,
286        id: &'static str,
287        detail: &'static str,
288    },
289    BlobGitmodules {
290        blob_oid: ObjectId,
291        id: &'static str,
292        detail: String,
293    },
294    BlobGitattributes {
295        blob_oid: ObjectId,
296        id: &'static str,
297        detail: &'static str,
298    },
299}
300
301impl DotFsckIssue {
302    /// Single-line diagnostic matching `git fsck` (`error in tree` / `warning in blob`, etc.).
303    #[must_use]
304    pub fn format_line(&self) -> String {
305        match self {
306            DotFsckIssue::TreeSymlink {
307                tree_oid,
308                id,
309                detail,
310            } => {
311                let prefix = if *id == "gitmodulesSymlink" {
312                    "error"
313                } else {
314                    "warning"
315                };
316                format!("{prefix} in tree {}: {}: {}", tree_oid.to_hex(), id, detail)
317            }
318            DotFsckIssue::NonBlobDotFile {
319                oid,
320                kind,
321                id,
322                detail,
323            } => format!(
324                "error in {} {}: {}: {}",
325                kind.as_str(),
326                oid.to_hex(),
327                id,
328                detail
329            ),
330            DotFsckIssue::BlobGitmodules {
331                blob_oid,
332                id,
333                detail,
334            } => {
335                let prefix = if *id == "gitmodulesParse" {
336                    "warning"
337                } else {
338                    "error"
339                };
340                format!("{prefix} in blob {}: {}: {}", blob_oid.to_hex(), id, detail)
341            }
342            DotFsckIssue::BlobGitattributes {
343                blob_oid,
344                id,
345                detail,
346            } => format!("error in blob {}: {}: {}", blob_oid.to_hex(), id, detail),
347        }
348    }
349
350    /// `true` when this fsck message is fatal by default (Git treats `gitmodulesParse` as INFO).
351    #[must_use]
352    pub fn is_error_severity(&self) -> bool {
353        !matches!(
354            self,
355            DotFsckIssue::BlobGitmodules {
356                id: "gitmodulesParse",
357                ..
358            } | DotFsckIssue::TreeSymlink {
359                id: "gitattributesSymlink" | "gitignoreSymlink" | "mailmapSymlink",
360                ..
361            }
362        )
363    }
364}
365
366/// True when raw `.gitmodules` bytes cannot be parsed as Git config (Git `git_config_from_mem` failure).
367fn gitmodules_blob_unparseable(data: &[u8]) -> bool {
368    for raw in data.split(|b| *b == b'\n') {
369        let line = trim_bytes(raw);
370        if line.is_empty() || line[0] == b'#' || line[0] == b';' {
371            continue;
372        }
373        if line.first() == Some(&b'[') && !line.contains(&b']') {
374            return true;
375        }
376    }
377    false
378}
379
380fn trim_bytes(mut s: &[u8]) -> &[u8] {
381    while let Some((&f, r)) = s.split_first() {
382        if f == b' ' || f == b'\t' {
383            s = r;
384        } else {
385            break;
386        }
387    }
388    while let Some((&l, r)) = s.split_last() {
389        if l == b' ' || l == b'\t' || l == b'\r' {
390            s = r;
391        } else {
392            break;
393        }
394    }
395    s
396}
397
398/// Content checks for OIDs registered as `.gitmodules` / `.gitattributes` targets (Git `fsck_blob` / `fsck_blobs`).
399pub fn fsck_dot_special_object(
400    oid: &ObjectId,
401    kind: ObjectKind,
402    data: &[u8],
403    gitmodules_oids: &HashSet<ObjectId>,
404    gitattributes_oids: &HashSet<ObjectId>,
405) -> Vec<DotFsckIssue> {
406    let mut out = Vec::new();
407    if gitmodules_oids.contains(oid) {
408        if kind != ObjectKind::Blob {
409            out.push(DotFsckIssue::NonBlobDotFile {
410                oid: *oid,
411                kind,
412                id: "gitmodulesBlob",
413                detail: "non-blob found at .gitmodules",
414            });
415            return out;
416        }
417        if let Some(msg) = validate_gitmodules_blob_line(data) {
418            let (id, detail) = split_fsck_colon(&msg);
419            out.push(DotFsckIssue::BlobGitmodules {
420                blob_oid: *oid,
421                id,
422                detail: detail.to_string(),
423            });
424        } else {
425            let text = std::str::from_utf8(data).unwrap_or("");
426            let strict_bad =
427                ConfigFile::parse(Path::new(".gitmodules"), text, ConfigScope::Local).is_err();
428            if strict_bad || gitmodules_blob_unparseable(data) {
429                out.push(DotFsckIssue::BlobGitmodules {
430                    blob_oid: *oid,
431                    id: "gitmodulesParse",
432                    detail: "could not parse gitmodules blob".to_string(),
433                });
434            }
435        }
436    }
437    if gitattributes_oids.contains(oid) {
438        if kind != ObjectKind::Blob {
439            out.push(DotFsckIssue::NonBlobDotFile {
440                oid: *oid,
441                kind,
442                id: "gitattributesBlob",
443                detail: "non-blob found at .gitattributes",
444            });
445            return out;
446        }
447        if data.len() > ATTR_MAX_FILE_SIZE {
448            out.push(DotFsckIssue::BlobGitattributes {
449                blob_oid: *oid,
450                id: "gitattributesLarge",
451                detail: ".gitattributes too large to parse",
452            });
453        } else {
454            let mut ptr = 0usize;
455            while ptr < data.len() {
456                let rest = &data[ptr..];
457                let line_end = rest.iter().position(|&b| b == b'\n').unwrap_or(rest.len());
458                if line_end >= ATTR_MAX_LINE_LENGTH {
459                    out.push(DotFsckIssue::BlobGitattributes {
460                        blob_oid: *oid,
461                        id: "gitattributesLineLength",
462                        detail: ".gitattributes has too long lines to parse",
463                    });
464                    break;
465                }
466                ptr += line_end;
467                if ptr < data.len() && data[ptr] == b'\n' {
468                    ptr += 1;
469                }
470            }
471        }
472    }
473    out
474}
475
476/// Tracks `.gitmodules` / `.gitattributes` blob OIDs discovered in trees (Git `fsck_options` oidsets).
477#[derive(Debug, Default)]
478pub struct DotFsckTracker {
479    pub gitmodules_found: HashSet<ObjectId>,
480    pub gitmodules_done: HashSet<ObjectId>,
481    pub gitattributes_found: HashSet<ObjectId>,
482    pub gitattributes_done: HashSet<ObjectId>,
483}
484
485impl DotFsckTracker {
486    /// Run per-tree registration and symlink checks (`fsck_tree` entry loop).
487    pub fn on_tree(&mut self, tree_oid: &ObjectId, data: &[u8]) -> Result<Vec<DotFsckIssue>> {
488        fsck_dot_special_tree_pass(
489            tree_oid,
490            data,
491            &mut self.gitmodules_found,
492            &mut self.gitattributes_found,
493        )
494    }
495
496    /// Run per-object blob checks when an OID is validated (`fsck_blob`).
497    pub fn on_object(
498        &mut self,
499        oid: &ObjectId,
500        kind: ObjectKind,
501        data: &[u8],
502    ) -> Vec<DotFsckIssue> {
503        let need_gm = self.gitmodules_found.contains(oid) && !self.gitmodules_done.contains(oid);
504        let need_ga =
505            self.gitattributes_found.contains(oid) && !self.gitattributes_done.contains(oid);
506        if !need_gm && !need_ga {
507            return Vec::new();
508        }
509        if need_gm {
510            self.gitmodules_done.insert(*oid);
511        }
512        if need_ga {
513            self.gitattributes_done.insert(*oid);
514        }
515        fsck_dot_special_object(
516            oid,
517            kind,
518            data,
519            &self.gitmodules_found,
520            &self.gitattributes_found,
521        )
522    }
523
524    /// Validate any registered blobs that were not reached during the main walk (`fsck_finish` / `fsck_blobs`).
525    pub fn finish_pending(&mut self, odb: &Odb) -> Result<Vec<DotFsckIssue>> {
526        self.finish_pending_resolve(|oid| odb.read(oid).ok().map(|o| (o.kind, o.data)))
527    }
528
529    /// Like [`Self::finish_pending`], but resolves object bytes via `resolve` (e.g. in-memory pack map).
530    pub fn finish_pending_resolve<F>(&mut self, mut resolve: F) -> Result<Vec<DotFsckIssue>>
531    where
532        F: FnMut(&ObjectId) -> Option<(ObjectKind, Vec<u8>)>,
533    {
534        let mut out = Vec::new();
535        let pending_gm: Vec<ObjectId> = self
536            .gitmodules_found
537            .difference(&self.gitmodules_done)
538            .copied()
539            .collect();
540        let pending_ga: Vec<ObjectId> = self
541            .gitattributes_found
542            .difference(&self.gitattributes_done)
543            .copied()
544            .collect();
545
546        for oid in pending_gm {
547            self.gitmodules_done.insert(oid);
548            let Some((kind, data)) = resolve(&oid) else {
549                continue;
550            };
551            out.extend(fsck_dot_special_object(
552                &oid,
553                kind,
554                &data,
555                &self.gitmodules_found,
556                &self.gitattributes_found,
557            ));
558        }
559        for oid in pending_ga {
560            if self.gitattributes_done.contains(&oid) {
561                continue;
562            }
563            self.gitattributes_done.insert(oid);
564            let Some((kind, data)) = resolve(&oid) else {
565                continue;
566            };
567            out.extend(fsck_dot_special_object(
568                &oid,
569                kind,
570                &data,
571                &self.gitmodules_found,
572                &self.gitattributes_found,
573            ));
574        }
575        Ok(out)
576    }
577}
578
579/// Run `.gitmodules` / `.gitattributes` fsck on a fully resolved pack object map (blob/tree bytes).
580///
581/// Used by `index-pack --strict` and `unpack-objects --strict` so oddly ordered packs still
582/// validate malicious `.gitmodules` content after delta resolution.
583pub fn verify_packed_dot_special(by_oid: &HashMap<ObjectId, (ObjectKind, Vec<u8>)>) -> Result<()> {
584    let mut tracker = DotFsckTracker::default();
585    let mut keys: Vec<ObjectId> = by_oid.keys().copied().collect();
586    keys.sort();
587    for oid in keys {
588        let (kind, data) = &by_oid[&oid];
589        if *kind == ObjectKind::Tree {
590            for di in tracker.on_tree(&oid, data)? {
591                if di.is_error_severity() {
592                    return Err(crate::error::Error::CorruptObject(di.format_line()));
593                }
594            }
595        }
596        for di in tracker.on_object(&oid, *kind, data) {
597            if di.is_error_severity() {
598                return Err(crate::error::Error::CorruptObject(di.format_line()));
599            }
600        }
601    }
602    for di in tracker.finish_pending_resolve(|id| by_oid.get(id).map(|(k, d)| (*k, d.clone())))? {
603        if di.is_error_severity() {
604            return Err(crate::error::Error::CorruptObject(di.format_line()));
605        }
606    }
607    Ok(())
608}
609
610fn split_fsck_colon(msg: &str) -> (&'static str, &str) {
611    let Some((a, b)) = msg.split_once(": ") else {
612        return ("gitmodules", msg);
613    };
614    match a {
615        "gitmodulesName" => ("gitmodulesName", b),
616        "gitmodulesUrl" => ("gitmodulesUrl", b),
617        "gitmodulesPath" => ("gitmodulesPath", b),
618        "gitmodulesUpdate" => ("gitmodulesUpdate", b),
619        _ => ("gitmodules", msg),
620    }
621}
622
623/// Write Git-style warnings for submodule path/url values that look like CLI options.
624pub fn write_gitmodules_cli_option_warnings(
625    w: &mut dyn Write,
626    content: &str,
627) -> std::io::Result<()> {
628    if let Ok(config) = ConfigFile::parse(Path::new(".gitmodules"), content, ConfigScope::Local) {
629        let mut any = false;
630        for entry in &config.entries {
631            let key = &entry.key;
632            let Some(rest) = key.strip_prefix("submodule.") else {
633                continue;
634            };
635            let Some(last_dot) = rest.rfind('.') else {
636                continue;
637            };
638            let var = &rest[last_dot + 1..];
639            if var != "path" && var != "url" {
640                continue;
641            }
642            let Some(value) = entry.value.as_deref() else {
643                continue;
644            };
645            if looks_like_command_line_option(value) {
646                writeln!(
647                    w,
648                    "warning: ignoring '{key}' which may be interpreted as a command-line option: {value}"
649                )?;
650                any = true;
651            }
652        }
653        if any {
654            return Ok(());
655        }
656    }
657
658    // Fallback: raw scan (handles minimal `.gitmodules` that the strict parser rejects).
659    let mut subsection: Option<&str> = None;
660    for line in content.lines() {
661        let trimmed = line.trim();
662        if trimmed.starts_with('[') {
663            subsection = None;
664            if let Some(inner) = trimmed.strip_prefix('[').and_then(|s| s.strip_suffix(']')) {
665                let inner = inner.trim();
666                if let Some(rest) = inner.strip_prefix("submodule") {
667                    let rest = rest.trim();
668                    let name = rest
669                        .strip_prefix('"')
670                        .and_then(|s| s.strip_suffix('"'))
671                        .unwrap_or(rest);
672                    if !name.is_empty() {
673                        subsection = Some(name);
674                    }
675                }
676            }
677            continue;
678        }
679        let Some((raw_key, raw_val)) = trimmed.split_once('=') else {
680            continue;
681        };
682        let key = raw_key.trim();
683        if key != "path" && key != "url" {
684            continue;
685        }
686        let mut val = raw_val.trim();
687        if val.len() >= 2 && val.starts_with('"') && val.ends_with('"') {
688            val = &val[1..val.len() - 1];
689        }
690        if looks_like_command_line_option(val) {
691            let key_full = match subsection {
692                Some(name) => format!("submodule.{name}.{key}"),
693                None => key.to_string(),
694            };
695            writeln!(
696                w,
697                "warning: ignoring '{key_full}' which may be interpreted as a command-line option: {val}"
698            )?;
699        }
700    }
701    Ok(())
702}
703
704/// Returns `true` when `name` is allowed as a submodule logical name (Git `check_submodule_name`).
705#[must_use]
706pub fn check_submodule_name(name: &str) -> bool {
707    if name.is_empty() {
708        return false;
709    }
710    let b = name.as_bytes();
711    // Git `check_submodule_name`: `goto in_component` before the loop — first component.
712    if b.len() >= 2
713        && b[0] == b'.'
714        && b[1] == b'.'
715        && (b.len() == 2 || b[2] == b'/' || b[2] == b'\\')
716    {
717        return false;
718    }
719    let mut i = 0usize;
720    while i < b.len() {
721        let c = b[i];
722        i += 1;
723        if c == b'/' || c == b'\\' {
724            let j = i;
725            if b.len() >= j + 2
726                && b[j] == b'.'
727                && b[j + 1] == b'.'
728                && (j + 2 >= b.len() || b[j + 2] == b'/' || b[j + 2] == b'\\')
729            {
730                return false;
731            }
732        }
733    }
734    true
735}
736
737fn is_xplatform_dir_sep(b: u8) -> bool {
738    b == b'/' || b == b'\\'
739}
740
741fn starts_with_dot_dot_slash(url: &str) -> bool {
742    let b = url.as_bytes();
743    b.len() >= 3 && b[0] == b'.' && b[1] == b'.' && is_xplatform_dir_sep(b[2])
744}
745
746fn starts_with_dot_slash(url: &str) -> bool {
747    let b = url.as_bytes();
748    b.len() >= 2 && b[0] == b'.' && is_xplatform_dir_sep(b[1])
749}
750
751fn submodule_url_is_relative(url: &str) -> bool {
752    starts_with_dot_slash(url) || starts_with_dot_dot_slash(url)
753}
754
755fn hex_val(b: u8) -> Option<u8> {
756    match b {
757        b'0'..=b'9' => Some(b - b'0'),
758        b'a'..=b'f' => Some(b - b'a' + 10),
759        b'A'..=b'F' => Some(b - b'A' + 10),
760        _ => None,
761    }
762}
763
764/// Percent-decode `%XX` sequences (Git `url_decode` subset for submodule URL safety checks).
765fn percent_decode_git_style(input: &str) -> Option<Vec<u8>> {
766    let b = input.as_bytes();
767    let mut out = Vec::with_capacity(b.len());
768    let mut i = 0usize;
769    while i < b.len() {
770        if b[i] == b'%' {
771            if i + 2 >= b.len() {
772                return None;
773            }
774            let hi = hex_val(b[i + 1])?;
775            let lo = hex_val(b[i + 2])?;
776            out.push((hi << 4) | lo);
777            i += 3;
778        } else {
779            out.push(b[i]);
780            i += 1;
781        }
782    }
783    Some(out)
784}
785
786/// Git `count_leading_dotdots` / leading `./` stripping (`submodule-config.c`).
787fn count_leading_dotdots(url: &str) -> (usize, &str) {
788    let mut n = 0usize;
789    let mut s = url;
790    loop {
791        if starts_with_dot_dot_slash(s) {
792            n += 1;
793            s = &s[3..];
794            continue;
795        }
796        if starts_with_dot_slash(s) {
797            s = &s[2..];
798            continue;
799        }
800        break;
801    }
802    (n, s)
803}
804
805fn url_to_curl_transport_url(url: &str) -> Option<&str> {
806    url.strip_prefix("http::")
807        .or_else(|| url.strip_prefix("https::"))
808        .or_else(|| url.strip_prefix("ftp::"))
809        .or_else(|| url.strip_prefix("ftps::"))
810        .or_else(|| {
811            if url.starts_with("http://")
812                || url.starts_with("https://")
813                || url.starts_with("ftp://")
814                || url.starts_with("ftps://")
815            {
816                Some(url)
817            } else {
818                None
819            }
820        })
821}
822
823/// Returns `true` when `url` is safe for `.gitmodules` (Git `check_submodule_url`).
824#[must_use]
825pub fn check_submodule_url(url: &str) -> bool {
826    if looks_like_command_line_option(url) {
827        return false;
828    }
829
830    if submodule_url_is_relative(url) || url.starts_with("git://") {
831        let Some(decoded) = percent_decode_git_style(url) else {
832            return false;
833        };
834        if decoded.contains(&b'\n') {
835            return false;
836        }
837        let (n, rest) = count_leading_dotdots(url);
838        if n > 0 {
839            let rb = rest.as_bytes();
840            if !rb.is_empty() && (rb[0] == b':' || rb[0] == b'/') {
841                return false;
842            }
843        }
844        return true;
845    }
846
847    if let Some(curl_url) = url_to_curl_transport_url(url) {
848        if (curl_url.starts_with("http://") || curl_url.starts_with("https://"))
849            && curl_url.contains(":///")
850        {
851            return false;
852        }
853        let Ok(parsed) = Url::parse(curl_url) else {
854            return false;
855        };
856        if !matches!(
857            parsed.scheme(),
858            "http" | "https" | "ftp" | "ftps" | "ws" | "wss"
859        ) {
860            return false;
861        }
862        if parsed.host_str().is_none() {
863            return false;
864        }
865        match parsed.host() {
866            Some(Host::Domain(d)) if d.contains(':') => return false,
867            None => return false,
868            _ => {}
869        }
870        if parsed.path().starts_with(':') {
871            return false;
872        }
873        let normalized = parsed.as_str();
874        let Some(decoded) = percent_decode_git_style(normalized) else {
875            return false;
876        };
877        !decoded.contains(&b'\n')
878    } else {
879        true
880    }
881}
882
883/// Max `.gitattributes` line length checked by Git `fsck` (`attr.h`).
884pub const ATTR_MAX_LINE_LENGTH: usize = 2048;
885
886/// Max `.gitattributes` blob size for fsck (`attr.h`).
887pub const ATTR_MAX_FILE_SIZE: usize = 100 * 1024 * 1024;
888
889/// `true` when `value` is a command-style submodule update (`!…`), matching Git fsck.
890fn submodule_update_is_command(value: &str) -> bool {
891    !value.is_empty() && value.starts_with('!')
892}
893
894fn raw_gitmodules_submodule_names(content: &str) -> Vec<String> {
895    let mut out = Vec::new();
896    for line in content.lines() {
897        let trimmed = line.trim();
898        if !trimmed.starts_with('[') {
899            continue;
900        }
901        let Some(inner) = trimmed.strip_prefix('[').and_then(|s| s.strip_suffix(']')) else {
902            continue;
903        };
904        let inner = inner.trim();
905        let Some(rest) = inner.strip_prefix("submodule") else {
906            continue;
907        };
908        let rest = rest.trim();
909        let name = rest
910            .strip_prefix('"')
911            .and_then(|s| s.strip_suffix('"'))
912            .unwrap_or(rest);
913        if !name.is_empty() {
914            out.push(name.to_string());
915        }
916    }
917    out
918}
919
920/// Validate a `.gitmodules` blob (Git `fsck_gitmodules_fn`). Returns `object hex: msg` or `None`.
921pub fn validate_gitmodules_blob_line(data: &[u8]) -> Option<String> {
922    let Ok(text) = std::str::from_utf8(data) else {
923        return None;
924    };
925
926    let mut worst: Option<String> = None;
927
928    if let Ok(config) = ConfigFile::parse(Path::new(".gitmodules"), text, ConfigScope::Local) {
929        for entry in &config.entries {
930            let key = &entry.key;
931            let Some(rest) = key.strip_prefix("submodule.") else {
932                continue;
933            };
934            let Some(last_dot) = rest.rfind('.') else {
935                continue;
936            };
937            let name = &rest[..last_dot];
938            let var = &rest[last_dot + 1..];
939
940            if !check_submodule_name(name) {
941                worst.get_or_insert_with(|| {
942                    format!("gitmodulesName: disallowed submodule name: {name}")
943                });
944            }
945
946            let Some(value) = entry.value.as_deref() else {
947                continue;
948            };
949
950            match var {
951                "url" => {
952                    if !check_submodule_url(value) {
953                        worst.get_or_insert_with(|| {
954                            format!("gitmodulesUrl: disallowed submodule url: {value}")
955                        });
956                    }
957                }
958                "path" => {
959                    if looks_like_command_line_option(value) {
960                        worst = Some(format!(
961                            "gitmodulesPath: disallowed submodule path: {value}"
962                        ));
963                    }
964                }
965                "update" => {
966                    if submodule_update_is_command(value) {
967                        worst.get_or_insert_with(|| {
968                            format!(
969                                "gitmodulesUpdate: disallowed submodule update setting: {value}"
970                            )
971                        });
972                    }
973                }
974                _ => {}
975            }
976        }
977    }
978
979    // Submodule subsection names can contain `..` and still parse as config lines, but our
980    // canonical key builder rejects those keys — so entries for malicious names are dropped
981    // silently. Always cross-check raw `[submodule "..."]` headers (Git fsck does this via the
982    // real config parser + `check_submodule_name`).
983    for name in raw_gitmodules_submodule_names(text) {
984        if !check_submodule_name(&name) {
985            worst.get_or_insert_with(|| {
986                format!("gitmodulesName: disallowed submodule name: {name}")
987            });
988        }
989    }
990
991    worst
992}
993
994fn collect_gitmodules_blobs_from_tree(
995    odb: &Odb,
996    tree_oid: ObjectId,
997    seen_trees: &mut HashSet<ObjectId>,
998) -> Result<HashSet<ObjectId>> {
999    let mut blobs = HashSet::new();
1000    let mut stack = vec![tree_oid];
1001    while let Some(tid) = stack.pop() {
1002        if !seen_trees.insert(tid) {
1003            continue;
1004        }
1005        let obj = odb.read(&tid)?;
1006        if obj.kind != ObjectKind::Tree {
1007            continue;
1008        }
1009        let entries = parse_tree(&obj.data)?;
1010        for TreeEntry { mode, name, oid } in entries {
1011            if tree_entry_is_gitmodules_blob(mode, &name) {
1012                blobs.insert(oid);
1013            } else if mode == 0o040000 {
1014                stack.push(oid);
1015            }
1016        }
1017    }
1018    Ok(blobs)
1019}
1020
1021/// Validate every `.gitmodules` blob reachable from `commit_oid`. Returns `Some(hex: msg)` on error.
1022pub fn verify_gitmodules_for_commit(odb: &Odb, commit_oid: ObjectId) -> Result<Option<String>> {
1023    let obj = odb.read(&commit_oid)?;
1024    if obj.kind != ObjectKind::Commit {
1025        return Ok(None);
1026    }
1027    let commit = parse_commit(&obj.data)?;
1028    let mut seen_trees = HashSet::new();
1029    let blobs = collect_gitmodules_blobs_from_tree(odb, commit.tree, &mut seen_trees)?;
1030    for oid in blobs {
1031        let blob = odb.read(&oid)?;
1032        if blob.kind != ObjectKind::Blob {
1033            continue;
1034        }
1035        if let Some(msg) = validate_gitmodules_blob_line(&blob.data) {
1036            return Ok(Some(format!("{}: {}", oid.to_hex(), msg)));
1037        }
1038    }
1039    Ok(None)
1040}
1041
1042/// Parse `objects/ab/cdef…` loose paths into OIDs; for `.idx` files load all contained OIDs.
1043pub fn oids_from_copied_object_paths(copied: &[PathBuf]) -> Result<HashSet<ObjectId>> {
1044    let mut out = HashSet::new();
1045    for p in copied {
1046        let Some(name) = p.file_name().and_then(|n| n.to_str()) else {
1047            continue;
1048        };
1049        if name.ends_with(".idx") {
1050            let idx = read_pack_index(p)?;
1051            for e in &idx.entries {
1052                if e.oid.len() == 20 {
1053                    if let Ok(oid) = ObjectId::from_bytes(&e.oid) {
1054                        out.insert(oid);
1055                    }
1056                }
1057            }
1058            continue;
1059        }
1060        if let Some(oid) = object_id_from_loose_object_path(p) {
1061            out.insert(oid);
1062        }
1063    }
1064    Ok(out)
1065}
1066
1067fn object_id_from_loose_object_path(path: &Path) -> Option<ObjectId> {
1068    let file_name = path.file_name()?.to_str()?;
1069    if file_name.len() != 38 {
1070        return None;
1071    }
1072    let parent = path.parent()?.file_name()?.to_str()?;
1073    if parent.len() != 2 {
1074        return None;
1075    }
1076    let hex = format!("{parent}{file_name}");
1077    ObjectId::from_hex(&hex).ok()
1078}