1use std::collections::{HashMap, HashSet};
10use std::io::Write;
11use std::path::Path;
12use std::path::PathBuf;
13
14use crate::config::{ConfigFile, ConfigScope};
15use crate::error::Result;
16use crate::objects::{parse_commit, parse_tree, ObjectId, ObjectKind, TreeEntry};
17use crate::odb::Odb;
18use crate::pack::read_pack_index;
19use url::{Host, Url};
20
21#[must_use]
23pub fn looks_like_command_line_option(s: &str) -> bool {
24 !s.is_empty() && s.as_bytes().first() == Some(&b'-')
25}
26
27#[must_use]
29pub fn tree_entry_is_gitmodules_blob(mode: u32, name: &[u8]) -> bool {
30 if mode == 0o120000 {
31 return false;
32 }
33 let Ok(name_str) = std::str::from_utf8(name) else {
34 return false;
35 };
36 is_hfs_dot_gitmodules(name_str) || is_ntfs_dot_gitmodules(name_str)
37}
38
39fn next_hfs_char(chars: &mut std::iter::Peekable<std::str::Chars>) -> Option<char> {
40 loop {
41 let ch = chars.next()?;
42 match ch {
43 '\u{200c}' | '\u{200d}' | '\u{200e}' | '\u{200f}' => continue,
44 '\u{202a}'..='\u{202e}' => continue,
45 '\u{206a}'..='\u{206f}' => continue,
46 '\u{feff}' => continue,
47 _ => return Some(ch),
48 }
49 }
50}
51
52fn is_hfs_dot_generic(path: &str, needle: &str) -> bool {
53 let mut chars = path.chars().peekable();
54 let mut c = match next_hfs_char(&mut chars) {
55 Some(x) => x,
56 None => return false,
57 };
58 if c != '.' {
59 return false;
60 }
61 for nc in needle.chars() {
62 c = match next_hfs_char(&mut chars) {
63 Some(x) => x,
64 None => return false,
65 };
66 if c as u32 > 127 {
67 return false;
68 }
69 if !c.eq_ignore_ascii_case(&nc) {
70 return false;
71 }
72 }
73 match next_hfs_char(&mut chars) {
74 None => true,
75 Some(ch) if ch == '/' => true,
76 Some(_) => false,
77 }
78}
79
80fn is_hfs_dot_gitmodules(path: &str) -> bool {
81 is_hfs_dot_generic(path, "gitmodules")
82}
83
84fn only_spaces_and_periods(name: &str, mut i: usize) -> bool {
85 let b = name.as_bytes();
86 loop {
87 let c = *b.get(i).unwrap_or(&0);
88 if c == 0 || c == b':' {
89 return true;
90 }
91 if c != b' ' && c != b'.' {
92 return false;
93 }
94 i += 1;
95 }
96}
97
98fn is_ntfs_dot_generic(name: &str, dotgit_name: &str, short_prefix: &str) -> bool {
99 let b = name.as_bytes();
100 let len = dotgit_name.len();
101 if !b.is_empty()
102 && b[0] == b'.'
103 && name.len() > len
104 && name[1..1 + len].eq_ignore_ascii_case(dotgit_name)
105 {
106 let i = len + 1;
107 return only_spaces_and_periods(name, i);
108 }
109
110 if b.len() >= 8
111 && name[..6].eq_ignore_ascii_case(&dotgit_name[..6])
112 && b[6] == b'~'
113 && (b[7] >= b'1' && b[7] <= b'4')
114 {
115 return only_spaces_and_periods(name, 8);
116 }
117
118 let mut i = 0usize;
119 let mut saw_tilde = false;
120 while i < 8 {
121 let c = *b.get(i).unwrap_or(&0);
122 if c == 0 {
123 return false;
124 }
125 if saw_tilde {
126 if !c.is_ascii_digit() {
127 return false;
128 }
129 } else if c == b'~' {
130 i += 1;
131 let d = *b.get(i).unwrap_or(&0);
132 if !(b'1'..=b'9').contains(&d) {
133 return false;
134 }
135 saw_tilde = true;
136 } else if i >= 6 {
137 return false;
138 } else if c & 0x80 != 0 {
139 return false;
140 } else {
141 let sc = short_prefix.as_bytes().get(i).copied().unwrap_or(0);
142 if (c as char).to_ascii_lowercase() != sc as char {
143 return false;
144 }
145 }
146 i += 1;
147 }
148 only_spaces_and_periods(name, i)
149}
150
151fn is_ntfs_dot_gitmodules(name: &str) -> bool {
152 is_ntfs_dot_generic(name, "gitmodules", "gi7eba")
153}
154
155fn is_hfs_dot_gitattributes(path: &str) -> bool {
156 is_hfs_dot_generic(path, "gitattributes")
157}
158
159fn is_ntfs_dot_gitattributes(name: &str) -> bool {
160 is_ntfs_dot_generic(name, "gitattributes", "gi7d29")
161}
162
163fn is_hfs_dot_gitignore(path: &str) -> bool {
164 is_hfs_dot_generic(path, "gitignore")
165}
166
167fn is_ntfs_dot_gitignore(name: &str) -> bool {
168 is_ntfs_dot_generic(name, "gitignore", "gi250a")
169}
170
171fn is_hfs_dot_mailmap(path: &str) -> bool {
172 is_hfs_dot_generic(path, "mailmap")
173}
174
175fn is_ntfs_dot_mailmap(name: &str) -> bool {
176 is_ntfs_dot_generic(name, "mailmap", "maba30")
177}
178
179#[must_use]
181pub fn tree_entry_is_gitattributes_blob(mode: u32, name: &[u8]) -> bool {
182 if mode == 0o120000 {
183 return false;
184 }
185 let Ok(name_str) = std::str::from_utf8(name) else {
186 return false;
187 };
188 is_hfs_dot_gitattributes(name_str) || is_ntfs_dot_gitattributes(name_str)
189}
190
191fn is_hfs_or_ntfs_dot_gitmodules(name: &str) -> bool {
192 is_hfs_dot_gitmodules(name) || is_ntfs_dot_gitmodules(name)
193}
194
195fn is_hfs_or_ntfs_dot_gitattributes(name: &str) -> bool {
196 is_hfs_dot_gitattributes(name) || is_ntfs_dot_gitattributes(name)
197}
198
199pub fn fsck_dot_special_tree_pass(
201 tree_oid: &ObjectId,
202 data: &[u8],
203 gitmodules_out: &mut HashSet<ObjectId>,
204 gitattributes_out: &mut HashSet<ObjectId>,
205) -> Result<Vec<DotFsckIssue>> {
206 let entries = parse_tree(data)?;
207 let mut issues = Vec::new();
208 for TreeEntry { mode, name, oid } in entries {
209 let Ok(name_str) = std::str::from_utf8(&name) else {
210 continue;
211 };
212 let is_symlink = mode == 0o120000;
213
214 if is_hfs_or_ntfs_dot_gitmodules(name_str) {
215 if is_symlink {
216 issues.push(DotFsckIssue::TreeSymlink {
217 tree_oid: *tree_oid,
218 id: "gitmodulesSymlink",
219 detail: ".gitmodules is a symbolic link",
220 });
221 } else {
222 gitmodules_out.insert(oid);
223 }
224 }
225
226 if is_hfs_or_ntfs_dot_gitattributes(name_str) {
227 if is_symlink {
228 issues.push(DotFsckIssue::TreeSymlink {
229 tree_oid: *tree_oid,
230 id: "gitattributesSymlink",
231 detail: ".gitattributes is a symlink",
232 });
233 } else {
234 gitattributes_out.insert(oid);
235 }
236 }
237
238 if is_symlink {
239 if is_hfs_dot_gitignore(name_str) || is_ntfs_dot_gitignore(name_str) {
240 issues.push(DotFsckIssue::TreeSymlink {
241 tree_oid: *tree_oid,
242 id: "gitignoreSymlink",
243 detail: ".gitignore is a symlink",
244 });
245 }
246 if is_hfs_dot_mailmap(name_str) || is_ntfs_dot_mailmap(name_str) {
247 issues.push(DotFsckIssue::TreeSymlink {
248 tree_oid: *tree_oid,
249 id: "mailmapSymlink",
250 detail: ".mailmap is a symlink",
251 });
252 }
253 }
254
255 let mut slash_rest = name_str;
256 while let Some(idx) = slash_rest.find('\\') {
257 let after = &slash_rest[idx + 1..];
258 if is_ntfs_dot_gitmodules(after) {
259 if is_symlink {
260 issues.push(DotFsckIssue::TreeSymlink {
261 tree_oid: *tree_oid,
262 id: "gitmodulesSymlink",
263 detail: ".gitmodules is a symbolic link",
264 });
265 } else {
266 gitmodules_out.insert(oid);
267 }
268 }
269 slash_rest = after;
270 }
271 }
272 Ok(issues)
273}
274
275#[derive(Debug, Clone)]
277pub enum DotFsckIssue {
278 TreeSymlink {
279 tree_oid: ObjectId,
280 id: &'static str,
281 detail: &'static str,
282 },
283 NonBlobDotFile {
284 oid: ObjectId,
285 kind: ObjectKind,
286 id: &'static str,
287 detail: &'static str,
288 },
289 BlobGitmodules {
290 blob_oid: ObjectId,
291 id: &'static str,
292 detail: String,
293 },
294 BlobGitattributes {
295 blob_oid: ObjectId,
296 id: &'static str,
297 detail: &'static str,
298 },
299}
300
301impl DotFsckIssue {
302 #[must_use]
304 pub fn format_line(&self) -> String {
305 match self {
306 DotFsckIssue::TreeSymlink {
307 tree_oid,
308 id,
309 detail,
310 } => {
311 let prefix = if *id == "gitmodulesSymlink" {
312 "error"
313 } else {
314 "warning"
315 };
316 format!("{prefix} in tree {}: {}: {}", tree_oid.to_hex(), id, detail)
317 }
318 DotFsckIssue::NonBlobDotFile {
319 oid,
320 kind,
321 id,
322 detail,
323 } => format!(
324 "error in {} {}: {}: {}",
325 kind.as_str(),
326 oid.to_hex(),
327 id,
328 detail
329 ),
330 DotFsckIssue::BlobGitmodules {
331 blob_oid,
332 id,
333 detail,
334 } => {
335 let prefix = if *id == "gitmodulesParse" {
336 "warning"
337 } else {
338 "error"
339 };
340 format!("{prefix} in blob {}: {}: {}", blob_oid.to_hex(), id, detail)
341 }
342 DotFsckIssue::BlobGitattributes {
343 blob_oid,
344 id,
345 detail,
346 } => format!("error in blob {}: {}: {}", blob_oid.to_hex(), id, detail),
347 }
348 }
349
350 #[must_use]
352 pub fn is_error_severity(&self) -> bool {
353 !matches!(
354 self,
355 DotFsckIssue::BlobGitmodules {
356 id: "gitmodulesParse",
357 ..
358 } | DotFsckIssue::TreeSymlink {
359 id: "gitattributesSymlink" | "gitignoreSymlink" | "mailmapSymlink",
360 ..
361 }
362 )
363 }
364}
365
366fn gitmodules_blob_unparseable(data: &[u8]) -> bool {
368 for raw in data.split(|b| *b == b'\n') {
369 let line = trim_bytes(raw);
370 if line.is_empty() || line[0] == b'#' || line[0] == b';' {
371 continue;
372 }
373 if line.first() == Some(&b'[') && !line.contains(&b']') {
374 return true;
375 }
376 }
377 false
378}
379
380fn trim_bytes(mut s: &[u8]) -> &[u8] {
381 while let Some((&f, r)) = s.split_first() {
382 if f == b' ' || f == b'\t' {
383 s = r;
384 } else {
385 break;
386 }
387 }
388 while let Some((&l, r)) = s.split_last() {
389 if l == b' ' || l == b'\t' || l == b'\r' {
390 s = r;
391 } else {
392 break;
393 }
394 }
395 s
396}
397
398pub fn fsck_dot_special_object(
400 oid: &ObjectId,
401 kind: ObjectKind,
402 data: &[u8],
403 gitmodules_oids: &HashSet<ObjectId>,
404 gitattributes_oids: &HashSet<ObjectId>,
405) -> Vec<DotFsckIssue> {
406 let mut out = Vec::new();
407 if gitmodules_oids.contains(oid) {
408 if kind != ObjectKind::Blob {
409 out.push(DotFsckIssue::NonBlobDotFile {
410 oid: *oid,
411 kind,
412 id: "gitmodulesBlob",
413 detail: "non-blob found at .gitmodules",
414 });
415 return out;
416 }
417 if let Some(msg) = validate_gitmodules_blob_line(data) {
418 let (id, detail) = split_fsck_colon(&msg);
419 out.push(DotFsckIssue::BlobGitmodules {
420 blob_oid: *oid,
421 id,
422 detail: detail.to_string(),
423 });
424 } else {
425 let text = std::str::from_utf8(data).unwrap_or("");
426 let strict_bad =
427 ConfigFile::parse(Path::new(".gitmodules"), text, ConfigScope::Local).is_err();
428 if strict_bad || gitmodules_blob_unparseable(data) {
429 out.push(DotFsckIssue::BlobGitmodules {
430 blob_oid: *oid,
431 id: "gitmodulesParse",
432 detail: "could not parse gitmodules blob".to_string(),
433 });
434 }
435 }
436 }
437 if gitattributes_oids.contains(oid) {
438 if kind != ObjectKind::Blob {
439 out.push(DotFsckIssue::NonBlobDotFile {
440 oid: *oid,
441 kind,
442 id: "gitattributesBlob",
443 detail: "non-blob found at .gitattributes",
444 });
445 return out;
446 }
447 if data.len() > ATTR_MAX_FILE_SIZE {
448 out.push(DotFsckIssue::BlobGitattributes {
449 blob_oid: *oid,
450 id: "gitattributesLarge",
451 detail: ".gitattributes too large to parse",
452 });
453 } else {
454 let mut ptr = 0usize;
455 while ptr < data.len() {
456 let rest = &data[ptr..];
457 let line_end = rest.iter().position(|&b| b == b'\n').unwrap_or(rest.len());
458 if line_end >= ATTR_MAX_LINE_LENGTH {
459 out.push(DotFsckIssue::BlobGitattributes {
460 blob_oid: *oid,
461 id: "gitattributesLineLength",
462 detail: ".gitattributes has too long lines to parse",
463 });
464 break;
465 }
466 ptr += line_end;
467 if ptr < data.len() && data[ptr] == b'\n' {
468 ptr += 1;
469 }
470 }
471 }
472 }
473 out
474}
475
476#[derive(Debug, Default)]
478pub struct DotFsckTracker {
479 pub gitmodules_found: HashSet<ObjectId>,
480 pub gitmodules_done: HashSet<ObjectId>,
481 pub gitattributes_found: HashSet<ObjectId>,
482 pub gitattributes_done: HashSet<ObjectId>,
483}
484
485impl DotFsckTracker {
486 pub fn on_tree(&mut self, tree_oid: &ObjectId, data: &[u8]) -> Result<Vec<DotFsckIssue>> {
488 fsck_dot_special_tree_pass(
489 tree_oid,
490 data,
491 &mut self.gitmodules_found,
492 &mut self.gitattributes_found,
493 )
494 }
495
496 pub fn on_object(
498 &mut self,
499 oid: &ObjectId,
500 kind: ObjectKind,
501 data: &[u8],
502 ) -> Vec<DotFsckIssue> {
503 let need_gm = self.gitmodules_found.contains(oid) && !self.gitmodules_done.contains(oid);
504 let need_ga =
505 self.gitattributes_found.contains(oid) && !self.gitattributes_done.contains(oid);
506 if !need_gm && !need_ga {
507 return Vec::new();
508 }
509 if need_gm {
510 self.gitmodules_done.insert(*oid);
511 }
512 if need_ga {
513 self.gitattributes_done.insert(*oid);
514 }
515 fsck_dot_special_object(
516 oid,
517 kind,
518 data,
519 &self.gitmodules_found,
520 &self.gitattributes_found,
521 )
522 }
523
524 pub fn finish_pending(&mut self, odb: &Odb) -> Result<Vec<DotFsckIssue>> {
526 self.finish_pending_resolve(|oid| odb.read(oid).ok().map(|o| (o.kind, o.data)))
527 }
528
529 pub fn finish_pending_resolve<F>(&mut self, mut resolve: F) -> Result<Vec<DotFsckIssue>>
531 where
532 F: FnMut(&ObjectId) -> Option<(ObjectKind, Vec<u8>)>,
533 {
534 let mut out = Vec::new();
535 let pending_gm: Vec<ObjectId> = self
536 .gitmodules_found
537 .difference(&self.gitmodules_done)
538 .copied()
539 .collect();
540 let pending_ga: Vec<ObjectId> = self
541 .gitattributes_found
542 .difference(&self.gitattributes_done)
543 .copied()
544 .collect();
545
546 for oid in pending_gm {
547 self.gitmodules_done.insert(oid);
548 let Some((kind, data)) = resolve(&oid) else {
549 continue;
550 };
551 out.extend(fsck_dot_special_object(
552 &oid,
553 kind,
554 &data,
555 &self.gitmodules_found,
556 &self.gitattributes_found,
557 ));
558 }
559 for oid in pending_ga {
560 if self.gitattributes_done.contains(&oid) {
561 continue;
562 }
563 self.gitattributes_done.insert(oid);
564 let Some((kind, data)) = resolve(&oid) else {
565 continue;
566 };
567 out.extend(fsck_dot_special_object(
568 &oid,
569 kind,
570 &data,
571 &self.gitmodules_found,
572 &self.gitattributes_found,
573 ));
574 }
575 Ok(out)
576 }
577}
578
579pub fn verify_packed_dot_special(by_oid: &HashMap<ObjectId, (ObjectKind, Vec<u8>)>) -> Result<()> {
584 let mut tracker = DotFsckTracker::default();
585 let mut keys: Vec<ObjectId> = by_oid.keys().copied().collect();
586 keys.sort();
587 for oid in keys {
588 let (kind, data) = &by_oid[&oid];
589 if *kind == ObjectKind::Tree {
590 for di in tracker.on_tree(&oid, data)? {
591 if di.is_error_severity() {
592 return Err(crate::error::Error::CorruptObject(di.format_line()));
593 }
594 }
595 }
596 for di in tracker.on_object(&oid, *kind, data) {
597 if di.is_error_severity() {
598 return Err(crate::error::Error::CorruptObject(di.format_line()));
599 }
600 }
601 }
602 for di in tracker.finish_pending_resolve(|id| by_oid.get(id).map(|(k, d)| (*k, d.clone())))? {
603 if di.is_error_severity() {
604 return Err(crate::error::Error::CorruptObject(di.format_line()));
605 }
606 }
607 Ok(())
608}
609
610fn split_fsck_colon(msg: &str) -> (&'static str, &str) {
611 let Some((a, b)) = msg.split_once(": ") else {
612 return ("gitmodules", msg);
613 };
614 match a {
615 "gitmodulesName" => ("gitmodulesName", b),
616 "gitmodulesUrl" => ("gitmodulesUrl", b),
617 "gitmodulesPath" => ("gitmodulesPath", b),
618 "gitmodulesUpdate" => ("gitmodulesUpdate", b),
619 _ => ("gitmodules", msg),
620 }
621}
622
623pub fn write_gitmodules_cli_option_warnings(
625 w: &mut dyn Write,
626 content: &str,
627) -> std::io::Result<()> {
628 if let Ok(config) = ConfigFile::parse(Path::new(".gitmodules"), content, ConfigScope::Local) {
629 let mut any = false;
630 for entry in &config.entries {
631 let key = &entry.key;
632 let Some(rest) = key.strip_prefix("submodule.") else {
633 continue;
634 };
635 let Some(last_dot) = rest.rfind('.') else {
636 continue;
637 };
638 let var = &rest[last_dot + 1..];
639 if var != "path" && var != "url" {
640 continue;
641 }
642 let Some(value) = entry.value.as_deref() else {
643 continue;
644 };
645 if looks_like_command_line_option(value) {
646 writeln!(
647 w,
648 "warning: ignoring '{key}' which may be interpreted as a command-line option: {value}"
649 )?;
650 any = true;
651 }
652 }
653 if any {
654 return Ok(());
655 }
656 }
657
658 let mut subsection: Option<&str> = None;
660 for line in content.lines() {
661 let trimmed = line.trim();
662 if trimmed.starts_with('[') {
663 subsection = None;
664 if let Some(inner) = trimmed.strip_prefix('[').and_then(|s| s.strip_suffix(']')) {
665 let inner = inner.trim();
666 if let Some(rest) = inner.strip_prefix("submodule") {
667 let rest = rest.trim();
668 let name = rest
669 .strip_prefix('"')
670 .and_then(|s| s.strip_suffix('"'))
671 .unwrap_or(rest);
672 if !name.is_empty() {
673 subsection = Some(name);
674 }
675 }
676 }
677 continue;
678 }
679 let Some((raw_key, raw_val)) = trimmed.split_once('=') else {
680 continue;
681 };
682 let key = raw_key.trim();
683 if key != "path" && key != "url" {
684 continue;
685 }
686 let mut val = raw_val.trim();
687 if val.len() >= 2 && val.starts_with('"') && val.ends_with('"') {
688 val = &val[1..val.len() - 1];
689 }
690 if looks_like_command_line_option(val) {
691 let key_full = match subsection {
692 Some(name) => format!("submodule.{name}.{key}"),
693 None => key.to_string(),
694 };
695 writeln!(
696 w,
697 "warning: ignoring '{key_full}' which may be interpreted as a command-line option: {val}"
698 )?;
699 }
700 }
701 Ok(())
702}
703
704#[must_use]
706pub fn check_submodule_name(name: &str) -> bool {
707 if name.is_empty() {
708 return false;
709 }
710 let b = name.as_bytes();
711 if b.len() >= 2
713 && b[0] == b'.'
714 && b[1] == b'.'
715 && (b.len() == 2 || b[2] == b'/' || b[2] == b'\\')
716 {
717 return false;
718 }
719 let mut i = 0usize;
720 while i < b.len() {
721 let c = b[i];
722 i += 1;
723 if c == b'/' || c == b'\\' {
724 let j = i;
725 if b.len() >= j + 2
726 && b[j] == b'.'
727 && b[j + 1] == b'.'
728 && (j + 2 >= b.len() || b[j + 2] == b'/' || b[j + 2] == b'\\')
729 {
730 return false;
731 }
732 }
733 }
734 true
735}
736
737fn is_xplatform_dir_sep(b: u8) -> bool {
738 b == b'/' || b == b'\\'
739}
740
741fn starts_with_dot_dot_slash(url: &str) -> bool {
742 let b = url.as_bytes();
743 b.len() >= 3 && b[0] == b'.' && b[1] == b'.' && is_xplatform_dir_sep(b[2])
744}
745
746fn starts_with_dot_slash(url: &str) -> bool {
747 let b = url.as_bytes();
748 b.len() >= 2 && b[0] == b'.' && is_xplatform_dir_sep(b[1])
749}
750
751fn submodule_url_is_relative(url: &str) -> bool {
752 starts_with_dot_slash(url) || starts_with_dot_dot_slash(url)
753}
754
755fn hex_val(b: u8) -> Option<u8> {
756 match b {
757 b'0'..=b'9' => Some(b - b'0'),
758 b'a'..=b'f' => Some(b - b'a' + 10),
759 b'A'..=b'F' => Some(b - b'A' + 10),
760 _ => None,
761 }
762}
763
764fn percent_decode_git_style(input: &str) -> Option<Vec<u8>> {
766 let b = input.as_bytes();
767 let mut out = Vec::with_capacity(b.len());
768 let mut i = 0usize;
769 while i < b.len() {
770 if b[i] == b'%' {
771 if i + 2 >= b.len() {
772 return None;
773 }
774 let hi = hex_val(b[i + 1])?;
775 let lo = hex_val(b[i + 2])?;
776 out.push((hi << 4) | lo);
777 i += 3;
778 } else {
779 out.push(b[i]);
780 i += 1;
781 }
782 }
783 Some(out)
784}
785
786fn count_leading_dotdots(url: &str) -> (usize, &str) {
788 let mut n = 0usize;
789 let mut s = url;
790 loop {
791 if starts_with_dot_dot_slash(s) {
792 n += 1;
793 s = &s[3..];
794 continue;
795 }
796 if starts_with_dot_slash(s) {
797 s = &s[2..];
798 continue;
799 }
800 break;
801 }
802 (n, s)
803}
804
805fn url_to_curl_transport_url(url: &str) -> Option<&str> {
806 url.strip_prefix("http::")
807 .or_else(|| url.strip_prefix("https::"))
808 .or_else(|| url.strip_prefix("ftp::"))
809 .or_else(|| url.strip_prefix("ftps::"))
810 .or_else(|| {
811 if url.starts_with("http://")
812 || url.starts_with("https://")
813 || url.starts_with("ftp://")
814 || url.starts_with("ftps://")
815 {
816 Some(url)
817 } else {
818 None
819 }
820 })
821}
822
823#[must_use]
825pub fn check_submodule_url(url: &str) -> bool {
826 if looks_like_command_line_option(url) {
827 return false;
828 }
829
830 if submodule_url_is_relative(url) || url.starts_with("git://") {
831 let Some(decoded) = percent_decode_git_style(url) else {
832 return false;
833 };
834 if decoded.contains(&b'\n') {
835 return false;
836 }
837 let (n, rest) = count_leading_dotdots(url);
838 if n > 0 {
839 let rb = rest.as_bytes();
840 if !rb.is_empty() && (rb[0] == b':' || rb[0] == b'/') {
841 return false;
842 }
843 }
844 return true;
845 }
846
847 if let Some(curl_url) = url_to_curl_transport_url(url) {
848 if (curl_url.starts_with("http://") || curl_url.starts_with("https://"))
849 && curl_url.contains(":///")
850 {
851 return false;
852 }
853 let Ok(parsed) = Url::parse(curl_url) else {
854 return false;
855 };
856 if !matches!(
857 parsed.scheme(),
858 "http" | "https" | "ftp" | "ftps" | "ws" | "wss"
859 ) {
860 return false;
861 }
862 if parsed.host_str().is_none() {
863 return false;
864 }
865 match parsed.host() {
866 Some(Host::Domain(d)) if d.contains(':') => return false,
867 None => return false,
868 _ => {}
869 }
870 if parsed.path().starts_with(':') {
871 return false;
872 }
873 let normalized = parsed.as_str();
874 let Some(decoded) = percent_decode_git_style(normalized) else {
875 return false;
876 };
877 !decoded.contains(&b'\n')
878 } else {
879 true
880 }
881}
882
883pub const ATTR_MAX_LINE_LENGTH: usize = 2048;
885
886pub const ATTR_MAX_FILE_SIZE: usize = 100 * 1024 * 1024;
888
889fn submodule_update_is_command(value: &str) -> bool {
891 !value.is_empty() && value.starts_with('!')
892}
893
894fn raw_gitmodules_submodule_names(content: &str) -> Vec<String> {
895 let mut out = Vec::new();
896 for line in content.lines() {
897 let trimmed = line.trim();
898 if !trimmed.starts_with('[') {
899 continue;
900 }
901 let Some(inner) = trimmed.strip_prefix('[').and_then(|s| s.strip_suffix(']')) else {
902 continue;
903 };
904 let inner = inner.trim();
905 let Some(rest) = inner.strip_prefix("submodule") else {
906 continue;
907 };
908 let rest = rest.trim();
909 let name = rest
910 .strip_prefix('"')
911 .and_then(|s| s.strip_suffix('"'))
912 .unwrap_or(rest);
913 if !name.is_empty() {
914 out.push(name.to_string());
915 }
916 }
917 out
918}
919
920pub fn validate_gitmodules_blob_line(data: &[u8]) -> Option<String> {
922 let Ok(text) = std::str::from_utf8(data) else {
923 return None;
924 };
925
926 let mut worst: Option<String> = None;
927
928 if let Ok(config) = ConfigFile::parse(Path::new(".gitmodules"), text, ConfigScope::Local) {
929 for entry in &config.entries {
930 let key = &entry.key;
931 let Some(rest) = key.strip_prefix("submodule.") else {
932 continue;
933 };
934 let Some(last_dot) = rest.rfind('.') else {
935 continue;
936 };
937 let name = &rest[..last_dot];
938 let var = &rest[last_dot + 1..];
939
940 if !check_submodule_name(name) {
941 worst.get_or_insert_with(|| {
942 format!("gitmodulesName: disallowed submodule name: {name}")
943 });
944 }
945
946 let Some(value) = entry.value.as_deref() else {
947 continue;
948 };
949
950 match var {
951 "url" => {
952 if !check_submodule_url(value) {
953 worst.get_or_insert_with(|| {
954 format!("gitmodulesUrl: disallowed submodule url: {value}")
955 });
956 }
957 }
958 "path" => {
959 if looks_like_command_line_option(value) {
960 worst = Some(format!(
961 "gitmodulesPath: disallowed submodule path: {value}"
962 ));
963 }
964 }
965 "update" => {
966 if submodule_update_is_command(value) {
967 worst.get_or_insert_with(|| {
968 format!(
969 "gitmodulesUpdate: disallowed submodule update setting: {value}"
970 )
971 });
972 }
973 }
974 _ => {}
975 }
976 }
977 }
978
979 for name in raw_gitmodules_submodule_names(text) {
984 if !check_submodule_name(&name) {
985 worst.get_or_insert_with(|| {
986 format!("gitmodulesName: disallowed submodule name: {name}")
987 });
988 }
989 }
990
991 worst
992}
993
994fn collect_gitmodules_blobs_from_tree(
995 odb: &Odb,
996 tree_oid: ObjectId,
997 seen_trees: &mut HashSet<ObjectId>,
998) -> Result<HashSet<ObjectId>> {
999 let mut blobs = HashSet::new();
1000 let mut stack = vec![tree_oid];
1001 while let Some(tid) = stack.pop() {
1002 if !seen_trees.insert(tid) {
1003 continue;
1004 }
1005 let obj = odb.read(&tid)?;
1006 if obj.kind != ObjectKind::Tree {
1007 continue;
1008 }
1009 let entries = parse_tree(&obj.data)?;
1010 for TreeEntry { mode, name, oid } in entries {
1011 if tree_entry_is_gitmodules_blob(mode, &name) {
1012 blobs.insert(oid);
1013 } else if mode == 0o040000 {
1014 stack.push(oid);
1015 }
1016 }
1017 }
1018 Ok(blobs)
1019}
1020
1021pub fn verify_gitmodules_for_commit(odb: &Odb, commit_oid: ObjectId) -> Result<Option<String>> {
1023 let obj = odb.read(&commit_oid)?;
1024 if obj.kind != ObjectKind::Commit {
1025 return Ok(None);
1026 }
1027 let commit = parse_commit(&obj.data)?;
1028 let mut seen_trees = HashSet::new();
1029 let blobs = collect_gitmodules_blobs_from_tree(odb, commit.tree, &mut seen_trees)?;
1030 for oid in blobs {
1031 let blob = odb.read(&oid)?;
1032 if blob.kind != ObjectKind::Blob {
1033 continue;
1034 }
1035 if let Some(msg) = validate_gitmodules_blob_line(&blob.data) {
1036 return Ok(Some(format!("{}: {}", oid.to_hex(), msg)));
1037 }
1038 }
1039 Ok(None)
1040}
1041
1042pub fn oids_from_copied_object_paths(copied: &[PathBuf]) -> Result<HashSet<ObjectId>> {
1044 let mut out = HashSet::new();
1045 for p in copied {
1046 let Some(name) = p.file_name().and_then(|n| n.to_str()) else {
1047 continue;
1048 };
1049 if name.ends_with(".idx") {
1050 let idx = read_pack_index(p)?;
1051 for e in &idx.entries {
1052 if e.oid.len() == 20 {
1053 if let Ok(oid) = ObjectId::from_bytes(&e.oid) {
1054 out.insert(oid);
1055 }
1056 }
1057 }
1058 continue;
1059 }
1060 if let Some(oid) = object_id_from_loose_object_path(p) {
1061 out.insert(oid);
1062 }
1063 }
1064 Ok(out)
1065}
1066
1067fn object_id_from_loose_object_path(path: &Path) -> Option<ObjectId> {
1068 let file_name = path.file_name()?.to_str()?;
1069 if file_name.len() != 38 {
1070 return None;
1071 }
1072 let parent = path.parent()?.file_name()?.to_str()?;
1073 if parent.len() != 2 {
1074 return None;
1075 }
1076 let hex = format!("{parent}{file_name}");
1077 ObjectId::from_hex(&hex).ok()
1078}