Skip to main content

grit_lib/
gitmodules.rs

1//! `.gitmodules` validation (Git `fsck` / `submodule-config` parity).
2//!
3//! Submodule `path` and `url` values must not look like command-line options
4//! (non-empty and starting with `-`). See Git's `looks_like_command_line_option` in `path.c`.
5
6use std::collections::HashSet;
7use std::io::Write;
8use std::path::Path;
9use std::path::PathBuf;
10
11use crate::config::{ConfigFile, ConfigScope};
12use crate::error::Result;
13use crate::objects::{parse_commit, parse_tree, ObjectId, ObjectKind, TreeEntry};
14use crate::odb::Odb;
15use crate::pack::read_pack_index;
16
17/// Returns `true` when `s` is non-empty and starts with `-` (Git `looks_like_command_line_option`).
18#[must_use]
19pub fn looks_like_command_line_option(s: &str) -> bool {
20    !s.is_empty() && s.as_bytes().first() == Some(&b'-')
21}
22
23/// True when `name` names a `.gitmodules` file (HFS / NTFS spellings), not a symlink.
24#[must_use]
25pub fn tree_entry_is_gitmodules_blob(mode: u32, name: &[u8]) -> bool {
26    if mode == 0o120000 {
27        return false;
28    }
29    let Ok(name_str) = std::str::from_utf8(name) else {
30        return false;
31    };
32    is_hfs_dot_gitmodules(name_str) || is_ntfs_dot_gitmodules(name_str)
33}
34
35fn next_hfs_char(chars: &mut std::iter::Peekable<std::str::Chars>) -> Option<char> {
36    loop {
37        let ch = chars.next()?;
38        match ch {
39            '\u{200c}' | '\u{200d}' | '\u{200e}' | '\u{200f}' => continue,
40            '\u{202a}'..='\u{202e}' => continue,
41            '\u{206a}'..='\u{206f}' => continue,
42            '\u{feff}' => continue,
43            _ => return Some(ch),
44        }
45    }
46}
47
48fn is_hfs_dot_generic(path: &str, needle: &str) -> bool {
49    let mut chars = path.chars().peekable();
50    let mut c = match next_hfs_char(&mut chars) {
51        Some(x) => x,
52        None => return false,
53    };
54    if c != '.' {
55        return false;
56    }
57    for nc in needle.chars() {
58        c = match next_hfs_char(&mut chars) {
59            Some(x) => x,
60            None => return false,
61        };
62        if c as u32 > 127 {
63            return false;
64        }
65        if !c.eq_ignore_ascii_case(&nc) {
66            return false;
67        }
68    }
69    match next_hfs_char(&mut chars) {
70        None => true,
71        Some(ch) if ch == '/' => true,
72        Some(_) => false,
73    }
74}
75
76fn is_hfs_dot_gitmodules(path: &str) -> bool {
77    is_hfs_dot_generic(path, "gitmodules")
78}
79
80fn only_spaces_and_periods(name: &str, mut i: usize) -> bool {
81    let b = name.as_bytes();
82    loop {
83        let c = *b.get(i).unwrap_or(&0);
84        if c == 0 || c == b':' {
85            return true;
86        }
87        if c != b' ' && c != b'.' {
88            return false;
89        }
90        i += 1;
91    }
92}
93
94fn is_ntfs_dot_generic(name: &str, dotgit_name: &str, short_prefix: &str) -> bool {
95    let b = name.as_bytes();
96    let len = dotgit_name.len();
97    if !b.is_empty()
98        && b[0] == b'.'
99        && name.len() > len
100        && name[1..1 + len].eq_ignore_ascii_case(dotgit_name)
101    {
102        let i = len + 1;
103        return only_spaces_and_periods(name, i);
104    }
105
106    if b.len() >= 8
107        && name[..6].eq_ignore_ascii_case(&dotgit_name[..6])
108        && b[6] == b'~'
109        && (b[7] >= b'1' && b[7] <= b'4')
110    {
111        return only_spaces_and_periods(name, 8);
112    }
113
114    let mut i = 0usize;
115    let mut saw_tilde = false;
116    while i < 8 {
117        let c = *b.get(i).unwrap_or(&0);
118        if c == 0 {
119            return false;
120        }
121        if saw_tilde {
122            if !c.is_ascii_digit() {
123                return false;
124            }
125        } else if c == b'~' {
126            i += 1;
127            let d = *b.get(i).unwrap_or(&0);
128            if !(b'1'..=b'9').contains(&d) {
129                return false;
130            }
131            saw_tilde = true;
132        } else if i >= 6 {
133            return false;
134        } else if c & 0x80 != 0 {
135            return false;
136        } else {
137            let sc = short_prefix.as_bytes().get(i).copied().unwrap_or(0);
138            if (c as char).to_ascii_lowercase() != sc as char {
139                return false;
140            }
141        }
142        i += 1;
143    }
144    only_spaces_and_periods(name, i)
145}
146
147fn is_ntfs_dot_gitmodules(name: &str) -> bool {
148    is_ntfs_dot_generic(name, "gitmodules", "gi7eba")
149}
150
151/// Write Git-style warnings for submodule path/url values that look like CLI options.
152pub fn write_gitmodules_cli_option_warnings(
153    w: &mut dyn Write,
154    content: &str,
155) -> std::io::Result<()> {
156    if let Ok(config) = ConfigFile::parse(Path::new(".gitmodules"), content, ConfigScope::Local) {
157        let mut any = false;
158        for entry in &config.entries {
159            let key = &entry.key;
160            let Some(rest) = key.strip_prefix("submodule.") else {
161                continue;
162            };
163            let Some(last_dot) = rest.rfind('.') else {
164                continue;
165            };
166            let var = &rest[last_dot + 1..];
167            if var != "path" && var != "url" {
168                continue;
169            }
170            let Some(value) = entry.value.as_deref() else {
171                continue;
172            };
173            if looks_like_command_line_option(value) {
174                writeln!(
175                    w,
176                    "warning: ignoring '{key}' which may be interpreted as a command-line option: {value}"
177                )?;
178                any = true;
179            }
180        }
181        if any {
182            return Ok(());
183        }
184    }
185
186    // Fallback: raw scan (handles minimal `.gitmodules` that the strict parser rejects).
187    let mut subsection: Option<&str> = None;
188    for line in content.lines() {
189        let trimmed = line.trim();
190        if trimmed.starts_with('[') {
191            subsection = None;
192            if let Some(inner) = trimmed.strip_prefix('[').and_then(|s| s.strip_suffix(']')) {
193                let inner = inner.trim();
194                if let Some(rest) = inner.strip_prefix("submodule") {
195                    let rest = rest.trim();
196                    let name = rest
197                        .strip_prefix('"')
198                        .and_then(|s| s.strip_suffix('"'))
199                        .unwrap_or(rest);
200                    if !name.is_empty() {
201                        subsection = Some(name);
202                    }
203                }
204            }
205            continue;
206        }
207        let Some((raw_key, raw_val)) = trimmed.split_once('=') else {
208            continue;
209        };
210        let key = raw_key.trim();
211        if key != "path" && key != "url" {
212            continue;
213        }
214        let mut val = raw_val.trim();
215        if val.len() >= 2 && val.starts_with('"') && val.ends_with('"') {
216            val = &val[1..val.len() - 1];
217        }
218        if looks_like_command_line_option(val) {
219            let key_full = match subsection {
220                Some(name) => format!("submodule.{name}.{key}"),
221                None => key.to_string(),
222            };
223            writeln!(
224                w,
225                "warning: ignoring '{key_full}' which may be interpreted as a command-line option: {val}"
226            )?;
227        }
228    }
229    Ok(())
230}
231
232fn check_submodule_name(name: &str) -> bool {
233    if name.is_empty() {
234        return false;
235    }
236    let b = name.as_bytes();
237    // Git `check_submodule_name`: `goto in_component` before the loop — first component.
238    if b.len() >= 2
239        && b[0] == b'.'
240        && b[1] == b'.'
241        && (b.len() == 2 || b[2] == b'/' || b[2] == b'\\')
242    {
243        return false;
244    }
245    let mut i = 0usize;
246    while i < b.len() {
247        let c = b[i];
248        i += 1;
249        if c == b'/' || c == b'\\' {
250            let j = i;
251            if b.len() >= j + 2
252                && b[j] == b'.'
253                && b[j + 1] == b'.'
254                && (j + 2 >= b.len() || b[j + 2] == b'/' || b[j + 2] == b'\\')
255            {
256                return false;
257            }
258        }
259    }
260    true
261}
262
263/// `true` when `value` is a command-style submodule update (`!…`), matching Git fsck.
264fn submodule_update_is_command(value: &str) -> bool {
265    !value.is_empty() && value.starts_with('!')
266}
267
268/// Validate a `.gitmodules` blob (Git `fsck_gitmodules_fn`). Returns `object hex: msg` or `None`.
269pub fn validate_gitmodules_blob_line(data: &[u8]) -> Option<String> {
270    let Ok(text) = std::str::from_utf8(data) else {
271        return None;
272    };
273    let config = ConfigFile::parse(Path::new(".gitmodules"), text, ConfigScope::Local).ok()?;
274
275    let mut worst: Option<String> = None;
276
277    for entry in &config.entries {
278        let key = &entry.key;
279        let Some(rest) = key.strip_prefix("submodule.") else {
280            continue;
281        };
282        let Some(last_dot) = rest.rfind('.') else {
283            continue;
284        };
285        let name = &rest[..last_dot];
286        let var = &rest[last_dot + 1..];
287
288        if !check_submodule_name(name) {
289            worst.get_or_insert_with(|| {
290                format!("gitmodulesName: disallowed submodule name: {name}")
291            });
292        }
293
294        let Some(value) = entry.value.as_deref() else {
295            continue;
296        };
297
298        match var {
299            "url" => {
300                if looks_like_command_line_option(value) {
301                    worst.get_or_insert_with(|| {
302                        format!("gitmodulesUrl: disallowed submodule url: {value}")
303                    });
304                }
305            }
306            "path" => {
307                if looks_like_command_line_option(value) {
308                    worst = Some(format!(
309                        "gitmodulesPath: disallowed submodule path: {value}"
310                    ));
311                }
312            }
313            "update" => {
314                if submodule_update_is_command(value) {
315                    worst.get_or_insert_with(|| {
316                        format!("gitmodulesUpdate: disallowed submodule update setting: {value}")
317                    });
318                }
319            }
320            _ => {}
321        }
322    }
323
324    worst
325}
326
327fn collect_gitmodules_blobs_from_tree(
328    odb: &Odb,
329    tree_oid: ObjectId,
330    seen_trees: &mut HashSet<ObjectId>,
331) -> Result<HashSet<ObjectId>> {
332    let mut blobs = HashSet::new();
333    let mut stack = vec![tree_oid];
334    while let Some(tid) = stack.pop() {
335        if !seen_trees.insert(tid) {
336            continue;
337        }
338        let obj = odb.read(&tid)?;
339        if obj.kind != ObjectKind::Tree {
340            continue;
341        }
342        let entries = parse_tree(&obj.data)?;
343        for TreeEntry { mode, name, oid } in entries {
344            if tree_entry_is_gitmodules_blob(mode, &name) {
345                blobs.insert(oid);
346            } else if mode == 0o040000 {
347                stack.push(oid);
348            }
349        }
350    }
351    Ok(blobs)
352}
353
354/// Validate every `.gitmodules` blob reachable from `commit_oid`. Returns `Some(hex: msg)` on error.
355pub fn verify_gitmodules_for_commit(odb: &Odb, commit_oid: ObjectId) -> Result<Option<String>> {
356    let obj = odb.read(&commit_oid)?;
357    if obj.kind != ObjectKind::Commit {
358        return Ok(None);
359    }
360    let commit = parse_commit(&obj.data)?;
361    let mut seen_trees = HashSet::new();
362    let blobs = collect_gitmodules_blobs_from_tree(odb, commit.tree, &mut seen_trees)?;
363    for oid in blobs {
364        let blob = odb.read(&oid)?;
365        if blob.kind != ObjectKind::Blob {
366            continue;
367        }
368        if let Some(msg) = validate_gitmodules_blob_line(&blob.data) {
369            return Ok(Some(format!("{}: {}", oid.to_hex(), msg)));
370        }
371    }
372    Ok(None)
373}
374
375/// Parse `objects/ab/cdef…` loose paths into OIDs; for `.idx` files load all contained OIDs.
376pub fn oids_from_copied_object_paths(copied: &[PathBuf]) -> Result<HashSet<ObjectId>> {
377    let mut out = HashSet::new();
378    for p in copied {
379        let Some(name) = p.file_name().and_then(|n| n.to_str()) else {
380            continue;
381        };
382        if name.ends_with(".idx") {
383            let idx = read_pack_index(p)?;
384            for e in &idx.entries {
385                out.insert(e.oid);
386            }
387            continue;
388        }
389        if let Some(oid) = object_id_from_loose_object_path(p) {
390            out.insert(oid);
391        }
392    }
393    Ok(out)
394}
395
396fn object_id_from_loose_object_path(path: &Path) -> Option<ObjectId> {
397    let file_name = path.file_name()?.to_str()?;
398    if file_name.len() != 38 {
399        return None;
400    }
401    let parent = path.parent()?.file_name()?.to_str()?;
402    if parent.len() != 2 {
403        return None;
404    }
405    let hex = format!("{parent}{file_name}");
406    ObjectId::from_hex(&hex).ok()
407}