1use std::collections::HashMap;
2use std::ffi::OsStr;
3use std::io::BufRead;
4use std::sync::atomic::AtomicBool;
5use std::{fs::File, process::Command};
6
7use anyhow::{anyhow, bail, Context as _, Error, Result};
8use camino::{Utf8Path, Utf8PathBuf};
9use clap::{Arg, ArgAction, ArgMatches, ValueHint};
10use rayon::{prelude::*, ThreadPoolBuilder};
11
12pub struct GitCache {
13 cache_base_dir: Utf8PathBuf,
14}
15
16impl GitCache {
17 pub fn new(cache_base_dir: Utf8PathBuf) -> Result<Self, Error> {
18 std::fs::create_dir_all(&cache_base_dir)
19 .with_context(|| format!("creating git cache base directory {cache_base_dir}"))?;
20
21 Ok(Self { cache_base_dir })
22 }
23
24 pub fn cloner(&self) -> GitCacheClonerBuilder {
25 let mut cloner = GitCacheClonerBuilder::default();
26 cloner.cache_base_dir(self.cache_base_dir.clone());
27 cloner
28 }
29}
30
31#[macro_use]
32extern crate derive_builder;
33
34#[derive(Builder)]
35pub struct GitCacheCloner {
36 cache_base_dir: Utf8PathBuf,
37 #[builder(setter(custom))]
38 repository_url: String,
39 #[builder(default = "true")]
40 cached: bool,
41 #[builder(default)]
42 update: bool,
43 #[builder(default)]
44 target_path: Option<Utf8PathBuf>,
45 #[builder(default)]
46 sparse_paths: Option<Vec<String>>,
47 #[builder(default)]
48 recurse_submodules: Option<Vec<String>>,
49 #[builder(default)]
50 recurse_all_submodules: bool,
51 #[builder(default)]
52 shallow_submodules: bool,
53 #[builder(default)]
54 commit: Option<String>,
55 #[builder(default)]
56 extra_clone_args: Option<Vec<String>>,
57 #[builder(default)]
58 jobs: Option<usize>,
59}
60
61impl GitCacheClonerBuilder {
62 pub fn repository_url(&mut self, url: String) -> &mut Self {
63 if self.cached.is_none() {
64 self.cached = Some(!repo_is_local(&url));
65 }
66 self.repository_url = Some(url);
67 self
68 }
69
70 pub fn do_clone(&mut self) -> Result<(), Error> {
71 self.build()
72 .expect("GitCacheCloner builder correctly set up")
73 .do_clone()
74 }
75 pub fn extra_clone_args_from_matches(&mut self, matches: &ArgMatches) -> &mut Self {
76 self.extra_clone_args(Some(get_pass_through_args(matches)))
77 }
78}
79
80fn repo_is_local(url: &str) -> bool {
87 if let Ok(url) = url::Url::parse(url) {
88 url.scheme() == "file"
89 } else {
90 (url.starts_with("./") || url.starts_with('/'))
91 || (!url_is_scp_scheme(url))
92 || std::path::Path::new(url).exists()
93 }
94}
95
96fn url_is_scp_scheme(url: &str) -> bool {
97 let at = url.find('@');
98 let colon = url.find(':');
99
100 if let Some(colon_pos) = colon {
101 if let Some(at_pos) = at {
102 if at_pos < colon_pos {
103 return true;
104 }
105 }
106 }
107
108 false
109}
110
111impl GitCacheCloner {
112 fn do_clone(&self) -> Result<(), Error> {
113 let repository = &self.repository_url;
114 let wanted_commit = self.commit.as_ref();
115 let target_path;
116
117 if self.cached {
118 let cache_repo = GitCacheRepo::new(&self.cache_base_dir, &self.repository_url);
119 target_path = cache_repo.target_path(self.target_path.as_ref())?;
120
121 let mut lock = cache_repo.lockfile()?;
122 {
123 let _lock = lock.write()?;
124 if !cache_repo.mirror()? {
125 let try_update =
126 wanted_commit.is_some_and(|commit| !cache_repo.has_commit(commit).unwrap());
127
128 if self.update || try_update {
129 println!("git-cache: updating cache for {repository}...");
130 cache_repo.update()?;
131 }
132
133 if let Some(commit) = wanted_commit {
134 if try_update && !cache_repo.has_commit(commit)? {
135 bail!("git-cache: {repository} does not contain commit {commit}");
136 }
137 }
138 }
139 }
140 {
141 let _lock = lock.read()?;
142 cache_repo.clone(target_path.as_str(), self.extra_clone_args.as_ref())?;
143 }
144 } else {
145 target_path =
146 target_path_from_url_maybe(&self.repository_url, self.target_path.as_ref())?;
147
148 direct_clone(
149 &self.repository_url,
150 target_path.as_str(),
151 self.extra_clone_args.as_ref(),
152 )?;
153 }
154
155 let target_repo = GitRepo {
156 path: target_path.clone(),
157 };
158
159 if let Some(commit) = wanted_commit {
160 target_repo.set_config("advice.detachedHead", "false")?;
161 target_repo.checkout(commit)?;
162 }
163 if let Some(sparse_paths) = self.sparse_paths.as_ref() {
164 target_repo.sparse_checkout(sparse_paths)?;
165 }
166
167 if self.recurse_all_submodules || self.recurse_submodules.is_some() {
168 let filter = if !self.recurse_all_submodules {
169 self.recurse_submodules.clone()
170 } else {
171 None
172 };
173
174 let cache = self.cache()?;
175
176 let jobs = self.jobs.unwrap_or(1);
177
178 static RAYON_CONFIGURED: AtomicBool = AtomicBool::new(false);
179
180 if !RAYON_CONFIGURED.swap(true, std::sync::atomic::Ordering::AcqRel) {
181 let _ = ThreadPoolBuilder::new().num_threads(jobs).build_global();
182 }
183
184 target_repo
185 .get_submodules(filter)?
186 .par_iter()
187 .map(|submodule| {
188 println!(
189 "git-cache: cloning {} into {}...",
190 submodule.url, submodule.path
191 );
192 target_repo.clone_submodule(
193 submodule,
194 &cache,
195 self.shallow_submodules,
196 self.update,
197 )
198 })
199 .collect::<Result<Vec<_>, _>>()?;
200 };
201
202 Ok(())
203 }
204
205 pub fn cache(&self) -> Result<GitCache, anyhow::Error> {
206 GitCache::new(self.cache_base_dir.clone())
207 }
208}
209
210pub struct GitRepo {
211 path: Utf8PathBuf,
212}
213
214pub struct GitCacheRepo {
215 url: String,
216 repo: GitRepo,
217}
218
219impl GitRepo {
220 fn git(&self) -> std::process::Command {
221 let mut command = Command::new("git");
222 command.arg("-C").arg(&self.path);
223
224 command
225 }
226
227 fn is_initialized(&self) -> Result<bool> {
228 Ok(self.path.is_dir()
229 && matches!(
230 self.git()
231 .arg("rev-parse")
232 .arg("--git-dir")
233 .output()?
234 .stdout
235 .as_slice(),
236 b".\n" | b".git\n"
237 ))
238 }
239
240 fn has_commit(&self, commit: &str) -> Result<bool> {
241 Ok(self
242 .git()
243 .arg("cat-file")
244 .arg("-e")
245 .arg(format!("{}^{{commit}}", commit))
246 .status()?
247 .success())
248 }
249
250 fn set_config(&self, key: &str, value: &str) -> Result<()> {
251 self.git()
252 .arg("config")
253 .arg(key)
254 .arg(value)
255 .status()?
256 .success()
257 .true_or(anyhow!("cannot set configuration value"))
258 }
259
260 fn checkout(&self, commit: &str) -> Result<()> {
261 self.git()
262 .arg("checkout")
263 .arg(commit)
264 .status()?
265 .success()
266 .true_or(anyhow!("error checking out commit"))
267 }
268
269 fn submodule_commits(&self) -> Result<HashMap<String, String>> {
270 let output = self.git().arg("submodule").arg("status").output()?;
271
272 let res = output
273 .stdout
274 .lines()
275 .map(|line| line.unwrap())
276 .map(|line| {
277 let commit = line[1..41].to_string();
279 let path = line[42..].to_string();
280 (path, commit)
281 })
282 .collect::<HashMap<String, String>>();
283 Ok(res)
284 }
285
286 fn sparse_checkout<I, S>(&self, sparse_paths: I) -> std::result::Result<(), anyhow::Error>
287 where
288 I: IntoIterator<Item = S>,
289 S: AsRef<OsStr>,
290 {
291 self.git()
292 .arg("sparse-checkout")
293 .arg("set")
294 .args(sparse_paths)
295 .status()?
296 .success()
297 .true_or(anyhow!("error setting up sparse checkout"))
298 }
299
300 fn get_submodules(
301 &self,
302 filter: Option<Vec<String>>,
303 ) -> std::result::Result<Vec<SubmoduleSpec>, anyhow::Error> {
304 use gix_config::File;
305 let mut path = self.path.clone();
306 path.push(".gitmodules");
307
308 if !path.exists() {
309 return Ok(Vec::new());
310 }
311
312 let gitconfig = File::from_path_no_includes(path.into(), gix_config::Source::Api)?;
313 let gitmodules = gitconfig.sections_by_name("submodule");
314
315 if gitmodules.is_none() {
316 return Ok(Vec::new());
317 }
318
319 let submodule_commits = self.submodule_commits()?;
320
321 println!("{:?}", submodule_commits);
322
323 let mut submodules = Vec::new();
324 for module in gitmodules.unwrap() {
325 let path = module.body().value("path");
326 let url = module.body().value("url");
327 let branch = module.body().value("branch").map(|b| b.to_string());
328
329 if path.is_none() || url.is_none() {
330 eprintln!("git-cache: submodule missing path or url");
331 continue;
332 }
333 let path = path.unwrap().into_owned().to_string();
334 let url = url.unwrap().into_owned().to_string();
335
336 let commit = submodule_commits.get(&path);
337
338 if commit.is_none() {
339 eprintln!("git-cache: could not find submodule commit for path `{path}`");
340 }
341
342 if let Some(filter) = filter.as_ref() {
343 if !filter.contains(&path) {
344 continue;
345 }
346 }
347
348 submodules.push(SubmoduleSpec::new(
349 path,
350 url,
351 commit.unwrap().clone(),
352 branch,
353 ));
354 }
355
356 Ok(submodules)
357 }
358
359 fn clone_submodule(
360 &self,
361 submodule: &SubmoduleSpec,
362 cache: &GitCache,
363 shallow_submodules: bool,
364 update: bool,
365 ) -> std::result::Result<(), anyhow::Error> {
366 let submodule_path = self.path.join(&submodule.path);
367
368 let mut cloner = cache.cloner();
369
370 cloner
371 .repository_url(submodule.url.clone())
372 .target_path(Some(submodule_path))
373 .recurse_all_submodules(true)
374 .shallow_submodules(shallow_submodules)
375 .commit(Some(submodule.commit.clone()))
376 .update(update);
377
378 cloner.do_clone()?;
383
384 self.init_submodule(&submodule.path)?;
385
386 Ok(())
387 }
388
389 fn init_submodule(&self, path: &str) -> std::result::Result<(), anyhow::Error> {
390 self.git()
391 .arg("submodule")
392 .arg("init")
393 .arg("--")
394 .arg(path)
395 .status()?
396 .success()
397 .true_or(anyhow!("error initializing submodule"))
398 }
399}
400
401impl GitCacheRepo {
402 pub fn new(base_path: &Utf8Path, url: &str) -> Self {
403 let mut path = base_path.to_path_buf();
404 path.push(Self::url_to_slug(url));
405 let cache_path = Utf8PathBuf::from(&path);
406 Self {
407 repo: GitRepo { path: cache_path },
408 url: url.to_string(),
409 }
410 }
411
412 fn mirror(&self) -> Result<bool> {
413 if !self.repo.is_initialized()? {
414 println!("git-cache: cloning {} into cache...", self.url);
415 std::fs::create_dir_all(&self.repo.path)?;
416 Command::new("git")
417 .arg("clone")
418 .arg("--mirror")
419 .arg("--")
420 .arg(&self.url)
421 .arg(&self.repo.path)
422 .status()?
423 .success()
424 .true_or(anyhow!("error mirroring repository"))?;
425
426 Ok(true)
427 } else {
428 Ok(false)
429 }
430 }
431
432 fn update(&self) -> Result<()> {
433 self.repo
434 .git()
435 .arg("remote")
436 .arg("update")
437 .status()?
438 .success()
439 .true_or(anyhow!("error updating repository"))
440 }
441
442 fn url_to_slug(url: &str) -> String {
443 use std::collections::hash_map::DefaultHasher;
444 use std::hash::{Hash, Hasher};
445
446 let mut hasher = DefaultHasher::new();
447 url.hash(&mut hasher);
448 format!("{}.git", hasher.finish())
449 }
450
451 fn clone(&self, target_path: &str, pass_through_args: Option<&Vec<String>>) -> Result<()> {
452 direct_clone(self.repo.path.as_str(), target_path, pass_through_args)?;
453
454 Command::new("git")
455 .arg("-C")
456 .arg(target_path)
457 .arg("remote")
458 .arg("set-url")
459 .arg("origin")
460 .arg(&self.url)
461 .status()?
462 .success()
463 .true_or(anyhow!("error updating remote url"))?;
464 Ok(())
465 }
466
467 pub fn target_path(&self, target_path: Option<&Utf8PathBuf>) -> Result<Utf8PathBuf> {
468 target_path_from_url_maybe(&self.url, target_path)
469 }
470
471 fn has_commit(&self, commit: &str) -> std::result::Result<bool, anyhow::Error> {
476 self.repo.has_commit(commit)
477 }
478
479 fn lockfile(&self) -> Result<fd_lock::RwLock<File>> {
480 let lock_path = self.repo.path.with_extension("lock");
481 Ok(fd_lock::RwLock::new(
482 std::fs::File::create(&lock_path)
483 .with_context(|| format!("creating lock file \"{lock_path}\""))?,
484 ))
485 }
486}
487
488fn direct_clone(
489 repo: &str,
490 target_path: &str,
491 pass_through_args: Option<&Vec<String>>,
492) -> Result<(), Error> {
493 let mut clone_cmd = Command::new("git");
494 clone_cmd.arg("clone").arg("--shared");
495 if let Some(args) = pass_through_args {
496 clone_cmd.args(args);
497 }
498 clone_cmd
499 .arg("--")
500 .arg(repo)
501 .arg(target_path)
502 .status()?
503 .success()
504 .true_or(anyhow!("cloning failed"))?;
505 Ok(())
506}
507
508fn target_path_from_url_maybe(
509 url: &str,
510 target_path: Option<&Utf8PathBuf>,
511) -> Result<Utf8PathBuf, Error> {
512 target_path.map(shellexpand::tilde);
513
514 let url_path = Utf8PathBuf::from(url);
515 let url_path_filename = Utf8PathBuf::from(url_path.file_name().unwrap());
516 let target_path = target_path.unwrap_or(&url_path_filename);
517
518 if !target_path.is_clone_target()? {
519 return Err(anyhow!(
520 "fatal: destination path '{target_path}' already exists and is not an empty directory."
521 ));
522 }
523
524 Ok(target_path.clone())
525}
526
527pub fn clap_git_cache_dir_arg() -> Arg {
528 Arg::new("git_cache_dir")
529 .short('c')
530 .long("cache-dir")
531 .help("git cache base directory")
532 .required(false)
533 .default_value("~/.gitcache")
534 .value_parser(clap::value_parser!(Utf8PathBuf))
535 .value_hint(ValueHint::DirPath)
536 .env("GIT_CACHE_DIR")
537 .num_args(1)
538}
539
540pub fn clap_clone_command(name: &'static str) -> clap::Command {
541 use clap::Command;
542 Command::new(name)
543 .about("clone repository")
544 .arg(
545 Arg::new("repository")
546 .help("repository to clone")
547 .required(true),
548 )
549 .arg(
550 Arg::new("target_path")
551 .help("target path")
552 .required(false)
553 .value_parser(clap::value_parser!(Utf8PathBuf))
554 .value_hint(ValueHint::DirPath),
555 )
556 .arg(
557 Arg::new("update")
558 .short('U')
559 .long("update")
560 .action(ArgAction::SetTrue)
561 .help("force update of cached repo"),
562 )
563 .arg(
564 Arg::new("commit")
565 .long("commit")
566 .value_name("HASH")
567 .conflicts_with("branch")
568 .help("check out specific commit"),
569 )
570 .arg(
571 Arg::new("sparse-add")
572 .long("sparse-add")
573 .value_name("PATH")
574 .conflicts_with("branch")
575 .action(ArgAction::Append)
576 .help("do a sparse checkout, keep PATH"),
577 )
578 .arg(
579 Arg::new("recurse-submodules")
580 .long("recurse-submodules")
581 .value_name("pathspec")
582 .action(ArgAction::Append)
583 .num_args(0..=1)
584 .require_equals(true)
585 .help("recursively clone submodules"),
586 )
587 .arg(
588 Arg::new("shallow-submodules")
589 .long("shallow-submodules")
590 .action(ArgAction::SetTrue)
591 .overrides_with("no-shallow-submodules")
592 .help("shallow-clone submodules"),
593 )
594 .arg(
595 Arg::new("no-shallow-submodules")
596 .long("no-shallow-submodules")
597 .action(ArgAction::SetTrue)
598 .overrides_with("shallow-submodules")
599 .help("don't shallow-clone submodules"),
600 )
601 .arg(
602 Arg::new("jobs")
603 .long("jobs")
604 .short('j')
605 .help("The number of submodules fetched at the same time.")
606 .num_args(1)
607 .value_parser(clap::value_parser!(usize)),
608 )
609 .args(pass_through_args())
610 .after_help(
611 "These regular \"git clone\" options are passed through:\n
612 [--template=<template-directory>]
613 [-l] [-s] [--no-hardlinks] [-q] [-n] [--bare] [--mirror]
614 [-o <name>] [-b <name>] [-u <upload-pack>] [--reference <repository>]
615 [--dissociate] [--separate-git-dir <git-dir>]
616 [--depth <depth>] [--[no-]single-branch] [--no-tags]
617 [--recurse-submodules[=<pathspec>]] [--[no-]shallow-submodules]
618 [--[no-]remote-submodules] [--jobs <n>] [--sparse] [--[no-]reject-shallow]
619 [--filter=<filter> [--also-filter-submodules]]",
620 )
621}
622
623fn pass_through_args() -> Vec<Arg> {
624 let mut args = Vec::new();
625
626 for (short, long) in [
628 ('l', "local"),
629 ('q', "quiet"),
631 ('s', "shared"),
632 ('v', "verbose"),
633 ]
634 .into_iter()
635 {
636 args.push(
637 Arg::new(long)
638 .short(short)
639 .long(long)
640 .hide(true)
641 .action(ArgAction::SetTrue),
642 );
643 }
644
645 args.push(
647 Arg::new("no-checkout")
648 .short('n')
649 .long("no-checkout")
650 .hide(true)
651 .num_args(0)
652 .default_value_if("commit", clap::builder::ArgPredicate::IsPresent, "true"),
653 );
654
655 args.push(
656 Arg::new("sparse")
657 .long("sparse")
658 .hide(true)
659 .num_args(0)
660 .default_value_if("sparse-add", clap::builder::ArgPredicate::IsPresent, "true"),
661 );
662
663 for (short, long) in [
665 ('b', "branch"),
666 ('c', "config"),
667 ('o', "origin"),
668 ('u', "upload-pack"),
669 ]
670 .into_iter()
671 {
672 args.push(
673 Arg::new(long)
674 .short(short)
675 .long(long)
676 .num_args(1)
677 .hide(true),
678 );
679 }
680
681 for id in [
683 "also-filter-submodules",
684 "bare",
685 "dissociate",
686 "mirror",
687 "no-hardlinks",
688 "no-reject-shallow",
689 "no-remote-submodules",
690 "no-single-branch",
691 "no-tags",
692 "reject-shallow",
693 "remote-submodules",
694 "single-branch",
695 ]
696 .into_iter()
697 {
698 args.push(Arg::new(id).long(id).action(ArgAction::SetTrue).hide(true));
699 }
700
701 for id in [
703 "bundle-uri",
704 "depth",
705 "filter",
706 "reference",
707 "reference-if-able",
708 "separate-git-dir",
709 "shallow-exclude",
710 "shallow-since",
711 "template",
712 ]
713 .into_iter()
714 {
715 args.push(Arg::new(id).long(id).num_args(1).hide(true));
716 }
717
718 args
719}
720
721fn get_pass_through_args(matches: &ArgMatches) -> Vec<String> {
722 let mut args = Vec::new();
723 for id in [
725 "local",
726 "no-checkout",
727 "quiet",
728 "shared",
729 "verbose",
730 "also-filter-submodules",
731 "bare",
732 "dissociate",
733 "mirror",
734 "no-hardlinks",
735 "no-reject-shallow",
736 "no-remote-submodules",
737 "no-single-branch",
738 "no-tags",
739 "reject-shallow",
740 "remote-submodules",
741 "single-branch",
742 "sparse",
743 ]
744 .into_iter()
745 {
746 if matches.get_flag(id) {
747 args.push(format!("--{id}"));
748 }
749 }
750
751 for id in [
753 "branch",
754 "bundle-uri",
755 "config",
756 "depth",
757 "filter",
758 "origin",
759 "reference",
760 "reference-if-able",
761 "separate-git-dir",
762 "shallow-exclude",
763 "shallow-since",
764 "template",
765 "upload-pack",
766 ]
767 .into_iter()
768 {
769 if let Some(occurrences) = matches.get_occurrences::<String>(id) {
770 for occurrence in occurrences.flatten() {
771 args.push(format!("--{id}"));
772 args.push(occurrence.clone());
773 }
774 }
775 }
776
777 args
778}
779
780trait CanCloneInto {
781 fn is_clone_target(&self) -> Result<bool, Error>;
782}
783
784impl CanCloneInto for camino::Utf8Path {
785 fn is_clone_target(&self) -> Result<bool, Error> {
786 Ok((!self.exists()) || (self.is_dir() && { self.read_dir()?.next().is_none() }))
787 }
788}
789
790trait TrueOr {
791 fn true_or(self, error: Error) -> Result<()>;
792}
793
794impl TrueOr for bool {
795 fn true_or(self, error: Error) -> Result<()> {
796 if self {
797 Ok(())
798 } else {
799 Err(error)
800 }
801 }
802}
803
804#[derive(Debug, Clone)]
805struct SubmoduleSpec {
806 path: String,
807 url: String,
808 #[allow(dead_code)]
809 branch: Option<String>,
810 commit: String,
811}
812
813impl SubmoduleSpec {
814 pub fn new(path: String, url: String, commit: String, branch: Option<String>) -> Self {
815 Self {
816 path,
817 url,
818 commit,
819 branch,
820 }
821 }
822}