git_cache/
lib.rs

1use std::collections::HashMap;
2use std::ffi::OsStr;
3use std::io::BufRead;
4use std::sync::atomic::AtomicBool;
5use std::{fs::File, process::Command};
6
7use anyhow::{anyhow, bail, Context as _, Error, Result};
8use camino::{Utf8Path, Utf8PathBuf};
9use clap::{Arg, ArgAction, ArgMatches, ValueHint};
10use rayon::{prelude::*, ThreadPoolBuilder};
11
12pub struct GitCache {
13    cache_base_dir: Utf8PathBuf,
14}
15
16impl GitCache {
17    pub fn new(cache_base_dir: Utf8PathBuf) -> Result<Self, Error> {
18        std::fs::create_dir_all(&cache_base_dir)
19            .with_context(|| format!("creating git cache base directory {cache_base_dir}"))?;
20
21        Ok(Self { cache_base_dir })
22    }
23
24    pub fn cloner(&self) -> GitCacheClonerBuilder {
25        let mut cloner = GitCacheClonerBuilder::default();
26        cloner.cache_base_dir(self.cache_base_dir.clone());
27        cloner
28    }
29}
30
31#[macro_use]
32extern crate derive_builder;
33
34#[derive(Builder)]
35pub struct GitCacheCloner {
36    cache_base_dir: Utf8PathBuf,
37    #[builder(setter(custom))]
38    repository_url: String,
39    #[builder(default = "true")]
40    cached: bool,
41    #[builder(default)]
42    update: bool,
43    #[builder(default)]
44    target_path: Option<Utf8PathBuf>,
45    #[builder(default)]
46    sparse_paths: Option<Vec<String>>,
47    #[builder(default)]
48    recurse_submodules: Option<Vec<String>>,
49    #[builder(default)]
50    recurse_all_submodules: bool,
51    #[builder(default)]
52    shallow_submodules: bool,
53    #[builder(default)]
54    commit: Option<String>,
55    #[builder(default)]
56    extra_clone_args: Option<Vec<String>>,
57    #[builder(default)]
58    jobs: Option<usize>,
59}
60
61impl GitCacheClonerBuilder {
62    pub fn repository_url(&mut self, url: String) -> &mut Self {
63        if self.cached.is_none() {
64            self.cached = Some(!repo_is_local(&url));
65        }
66        self.repository_url = Some(url);
67        self
68    }
69
70    pub fn do_clone(&mut self) -> Result<(), Error> {
71        self.build()
72            .expect("GitCacheCloner builder correctly set up")
73            .do_clone()
74    }
75    pub fn extra_clone_args_from_matches(&mut self, matches: &ArgMatches) -> &mut Self {
76        self.extra_clone_args(Some(get_pass_through_args(matches)))
77    }
78}
79
80/// returns `true` if the git repo url points to a local path
81///
82/// This function tries to mimic Git's notion of a local repository.
83///
84/// Some things to watch out for:
85/// - this does not take bundles into account
86fn repo_is_local(url: &str) -> bool {
87    if let Ok(url) = url::Url::parse(url) {
88        url.scheme() == "file"
89    } else {
90        (url.starts_with("./") || url.starts_with('/'))
91            || (!url_is_scp_scheme(url))
92            || std::path::Path::new(url).exists()
93    }
94}
95
96fn url_is_scp_scheme(url: &str) -> bool {
97    let at = url.find('@');
98    let colon = url.find(':');
99
100    if let Some(colon_pos) = colon {
101        if let Some(at_pos) = at {
102            if at_pos < colon_pos {
103                return true;
104            }
105        }
106    }
107
108    false
109}
110
111impl GitCacheCloner {
112    fn do_clone(&self) -> Result<(), Error> {
113        let repository = &self.repository_url;
114        let wanted_commit = self.commit.as_ref();
115        let target_path;
116
117        if self.cached {
118            let cache_repo = GitCacheRepo::new(&self.cache_base_dir, &self.repository_url);
119            target_path = cache_repo.target_path(self.target_path.as_ref())?;
120
121            let mut lock = cache_repo.lockfile()?;
122            {
123                let _lock = lock.write()?;
124                if !cache_repo.mirror()? {
125                    let try_update =
126                        wanted_commit.is_some_and(|commit| !cache_repo.has_commit(commit).unwrap());
127
128                    if self.update || try_update {
129                        println!("git-cache: updating cache for {repository}...");
130                        cache_repo.update()?;
131                    }
132
133                    if let Some(commit) = wanted_commit {
134                        if try_update && !cache_repo.has_commit(commit)? {
135                            bail!("git-cache: {repository} does not contain commit {commit}");
136                        }
137                    }
138                }
139            }
140            {
141                let _lock = lock.read()?;
142                cache_repo.clone(target_path.as_str(), self.extra_clone_args.as_ref())?;
143            }
144        } else {
145            target_path =
146                target_path_from_url_maybe(&self.repository_url, self.target_path.as_ref())?;
147
148            direct_clone(
149                &self.repository_url,
150                target_path.as_str(),
151                self.extra_clone_args.as_ref(),
152            )?;
153        }
154
155        let target_repo = GitRepo {
156            path: target_path.clone(),
157        };
158
159        if let Some(commit) = wanted_commit {
160            target_repo.set_config("advice.detachedHead", "false")?;
161            target_repo.checkout(commit)?;
162        }
163        if let Some(sparse_paths) = self.sparse_paths.as_ref() {
164            target_repo.sparse_checkout(sparse_paths)?;
165        }
166
167        if self.recurse_all_submodules || self.recurse_submodules.is_some() {
168            let filter = if !self.recurse_all_submodules {
169                self.recurse_submodules.clone()
170            } else {
171                None
172            };
173
174            let cache = self.cache()?;
175
176            let jobs = self.jobs.unwrap_or(1);
177
178            static RAYON_CONFIGURED: AtomicBool = AtomicBool::new(false);
179
180            if !RAYON_CONFIGURED.swap(true, std::sync::atomic::Ordering::AcqRel) {
181                let _ = ThreadPoolBuilder::new().num_threads(jobs).build_global();
182            }
183
184            target_repo
185                .get_submodules(filter)?
186                .par_iter()
187                .map(|submodule| {
188                    println!(
189                        "git-cache: cloning {} into {}...",
190                        submodule.url, submodule.path
191                    );
192                    target_repo.clone_submodule(
193                        submodule,
194                        &cache,
195                        self.shallow_submodules,
196                        self.update,
197                    )
198                })
199                .collect::<Result<Vec<_>, _>>()?;
200        };
201
202        Ok(())
203    }
204
205    pub fn cache(&self) -> Result<GitCache, anyhow::Error> {
206        GitCache::new(self.cache_base_dir.clone())
207    }
208}
209
210pub struct GitRepo {
211    path: Utf8PathBuf,
212}
213
214pub struct GitCacheRepo {
215    url: String,
216    repo: GitRepo,
217}
218
219impl GitRepo {
220    fn git(&self) -> std::process::Command {
221        let mut command = Command::new("git");
222        command.arg("-C").arg(&self.path);
223
224        command
225    }
226
227    fn is_initialized(&self) -> Result<bool> {
228        Ok(self.path.is_dir()
229            && matches!(
230                self.git()
231                    .arg("rev-parse")
232                    .arg("--git-dir")
233                    .output()?
234                    .stdout
235                    .as_slice(),
236                b".\n" | b".git\n"
237            ))
238    }
239
240    fn has_commit(&self, commit: &str) -> Result<bool> {
241        Ok(self
242            .git()
243            .arg("cat-file")
244            .arg("-e")
245            .arg(format!("{}^{{commit}}", commit))
246            .status()?
247            .success())
248    }
249
250    fn set_config(&self, key: &str, value: &str) -> Result<()> {
251        self.git()
252            .arg("config")
253            .arg(key)
254            .arg(value)
255            .status()?
256            .success()
257            .true_or(anyhow!("cannot set configuration value"))
258    }
259
260    fn checkout(&self, commit: &str) -> Result<()> {
261        self.git()
262            .arg("checkout")
263            .arg(commit)
264            .status()?
265            .success()
266            .true_or(anyhow!("error checking out commit"))
267    }
268
269    fn submodule_commits(&self) -> Result<HashMap<String, String>> {
270        let output = self.git().arg("submodule").arg("status").output()?;
271
272        let res = output
273            .stdout
274            .lines()
275            .map(|line| line.unwrap())
276            .map(|line| {
277                // ` f47ce7b5fbbb3aa43d33d2be1f6cd3746b13d5bf some/path`
278                let commit = line[1..41].to_string();
279                let path = line[42..].to_string();
280                (path, commit)
281            })
282            .collect::<HashMap<String, String>>();
283        Ok(res)
284    }
285
286    fn sparse_checkout<I, S>(&self, sparse_paths: I) -> std::result::Result<(), anyhow::Error>
287    where
288        I: IntoIterator<Item = S>,
289        S: AsRef<OsStr>,
290    {
291        self.git()
292            .arg("sparse-checkout")
293            .arg("set")
294            .args(sparse_paths)
295            .status()?
296            .success()
297            .true_or(anyhow!("error setting up sparse checkout"))
298    }
299
300    fn get_submodules(
301        &self,
302        filter: Option<Vec<String>>,
303    ) -> std::result::Result<Vec<SubmoduleSpec>, anyhow::Error> {
304        use gix_config::File;
305        let mut path = self.path.clone();
306        path.push(".gitmodules");
307
308        if !path.exists() {
309            return Ok(Vec::new());
310        }
311
312        let gitconfig = File::from_path_no_includes(path.into(), gix_config::Source::Api)?;
313        let gitmodules = gitconfig.sections_by_name("submodule");
314
315        if gitmodules.is_none() {
316            return Ok(Vec::new());
317        }
318
319        let submodule_commits = self.submodule_commits()?;
320
321        println!("{:?}", submodule_commits);
322
323        let mut submodules = Vec::new();
324        for module in gitmodules.unwrap() {
325            let path = module.body().value("path");
326            let url = module.body().value("url");
327            let branch = module.body().value("branch").map(|b| b.to_string());
328
329            if path.is_none() || url.is_none() {
330                eprintln!("git-cache: submodule missing path or url");
331                continue;
332            }
333            let path = path.unwrap().into_owned().to_string();
334            let url = url.unwrap().into_owned().to_string();
335
336            let commit = submodule_commits.get(&path);
337
338            if commit.is_none() {
339                eprintln!("git-cache: could not find submodule commit for path `{path}`");
340            }
341
342            if let Some(filter) = filter.as_ref() {
343                if !filter.contains(&path) {
344                    continue;
345                }
346            }
347
348            submodules.push(SubmoduleSpec::new(
349                path,
350                url,
351                commit.unwrap().clone(),
352                branch,
353            ));
354        }
355
356        Ok(submodules)
357    }
358
359    fn clone_submodule(
360        &self,
361        submodule: &SubmoduleSpec,
362        cache: &GitCache,
363        shallow_submodules: bool,
364        update: bool,
365    ) -> std::result::Result<(), anyhow::Error> {
366        let submodule_path = self.path.join(&submodule.path);
367
368        let mut cloner = cache.cloner();
369
370        cloner
371            .repository_url(submodule.url.clone())
372            .target_path(Some(submodule_path))
373            .recurse_all_submodules(true)
374            .shallow_submodules(shallow_submodules)
375            .commit(Some(submodule.commit.clone()))
376            .update(update);
377
378        // if let Some(branch) = submodule.branch {
379        //     cloner.extra_clone_args(Some(vec!["--branch".into(), branch]));
380        // }
381
382        cloner.do_clone()?;
383
384        self.init_submodule(&submodule.path)?;
385
386        Ok(())
387    }
388
389    fn init_submodule(&self, path: &str) -> std::result::Result<(), anyhow::Error> {
390        self.git()
391            .arg("submodule")
392            .arg("init")
393            .arg("--")
394            .arg(path)
395            .status()?
396            .success()
397            .true_or(anyhow!("error initializing submodule"))
398    }
399}
400
401impl GitCacheRepo {
402    pub fn new(base_path: &Utf8Path, url: &str) -> Self {
403        let mut path = base_path.to_path_buf();
404        path.push(Self::url_to_slug(url));
405        let cache_path = Utf8PathBuf::from(&path);
406        Self {
407            repo: GitRepo { path: cache_path },
408            url: url.to_string(),
409        }
410    }
411
412    fn mirror(&self) -> Result<bool> {
413        if !self.repo.is_initialized()? {
414            println!("git-cache: cloning {} into cache...", self.url);
415            std::fs::create_dir_all(&self.repo.path)?;
416            Command::new("git")
417                .arg("clone")
418                .arg("--mirror")
419                .arg("--")
420                .arg(&self.url)
421                .arg(&self.repo.path)
422                .status()?
423                .success()
424                .true_or(anyhow!("error mirroring repository"))?;
425
426            Ok(true)
427        } else {
428            Ok(false)
429        }
430    }
431
432    fn update(&self) -> Result<()> {
433        self.repo
434            .git()
435            .arg("remote")
436            .arg("update")
437            .status()?
438            .success()
439            .true_or(anyhow!("error updating repository"))
440    }
441
442    fn url_to_slug(url: &str) -> String {
443        use std::collections::hash_map::DefaultHasher;
444        use std::hash::{Hash, Hasher};
445
446        let mut hasher = DefaultHasher::new();
447        url.hash(&mut hasher);
448        format!("{}.git", hasher.finish())
449    }
450
451    fn clone(&self, target_path: &str, pass_through_args: Option<&Vec<String>>) -> Result<()> {
452        direct_clone(self.repo.path.as_str(), target_path, pass_through_args)?;
453
454        Command::new("git")
455            .arg("-C")
456            .arg(target_path)
457            .arg("remote")
458            .arg("set-url")
459            .arg("origin")
460            .arg(&self.url)
461            .status()?
462            .success()
463            .true_or(anyhow!("error updating remote url"))?;
464        Ok(())
465    }
466
467    pub fn target_path(&self, target_path: Option<&Utf8PathBuf>) -> Result<Utf8PathBuf> {
468        target_path_from_url_maybe(&self.url, target_path)
469    }
470
471    // fn is_initialized(&self) -> std::result::Result<bool, anyhow::Error> {
472    //     self.repo.is_initialized()
473    // }
474
475    fn has_commit(&self, commit: &str) -> std::result::Result<bool, anyhow::Error> {
476        self.repo.has_commit(commit)
477    }
478
479    fn lockfile(&self) -> Result<fd_lock::RwLock<File>> {
480        let lock_path = self.repo.path.with_extension("lock");
481        Ok(fd_lock::RwLock::new(
482            std::fs::File::create(&lock_path)
483                .with_context(|| format!("creating lock file \"{lock_path}\""))?,
484        ))
485    }
486}
487
488fn direct_clone(
489    repo: &str,
490    target_path: &str,
491    pass_through_args: Option<&Vec<String>>,
492) -> Result<(), Error> {
493    let mut clone_cmd = Command::new("git");
494    clone_cmd.arg("clone").arg("--shared");
495    if let Some(args) = pass_through_args {
496        clone_cmd.args(args);
497    }
498    clone_cmd
499        .arg("--")
500        .arg(repo)
501        .arg(target_path)
502        .status()?
503        .success()
504        .true_or(anyhow!("cloning failed"))?;
505    Ok(())
506}
507
508fn target_path_from_url_maybe(
509    url: &str,
510    target_path: Option<&Utf8PathBuf>,
511) -> Result<Utf8PathBuf, Error> {
512    target_path.map(shellexpand::tilde);
513
514    let url_path = Utf8PathBuf::from(url);
515    let url_path_filename = Utf8PathBuf::from(url_path.file_name().unwrap());
516    let target_path = target_path.unwrap_or(&url_path_filename);
517
518    if !target_path.is_clone_target()? {
519        return Err(anyhow!(
520            "fatal: destination path '{target_path}' already exists and is not an empty directory."
521        ));
522    }
523
524    Ok(target_path.clone())
525}
526
527pub fn clap_git_cache_dir_arg() -> Arg {
528    Arg::new("git_cache_dir")
529        .short('c')
530        .long("cache-dir")
531        .help("git cache base directory")
532        .required(false)
533        .default_value("~/.gitcache")
534        .value_parser(clap::value_parser!(Utf8PathBuf))
535        .value_hint(ValueHint::DirPath)
536        .env("GIT_CACHE_DIR")
537        .num_args(1)
538}
539
540pub fn clap_clone_command(name: &'static str) -> clap::Command {
541    use clap::Command;
542    Command::new(name)
543        .about("clone repository")
544        .arg(
545            Arg::new("repository")
546                .help("repository to clone")
547                .required(true),
548        )
549        .arg(
550            Arg::new("target_path")
551                .help("target path")
552                .required(false)
553                .value_parser(clap::value_parser!(Utf8PathBuf))
554                .value_hint(ValueHint::DirPath),
555        )
556        .arg(
557            Arg::new("update")
558                .short('U')
559                .long("update")
560                .action(ArgAction::SetTrue)
561                .help("force update of cached repo"),
562        )
563        .arg(
564            Arg::new("commit")
565                .long("commit")
566                .value_name("HASH")
567                .conflicts_with("branch")
568                .help("check out specific commit"),
569        )
570        .arg(
571            Arg::new("sparse-add")
572                .long("sparse-add")
573                .value_name("PATH")
574                .conflicts_with("branch")
575                .action(ArgAction::Append)
576                .help("do a sparse checkout, keep PATH"),
577        )
578        .arg(
579            Arg::new("recurse-submodules")
580                .long("recurse-submodules")
581                .value_name("pathspec")
582                .action(ArgAction::Append)
583                .num_args(0..=1)
584                .require_equals(true)
585                .help("recursively clone submodules"),
586        )
587        .arg(
588            Arg::new("shallow-submodules")
589                .long("shallow-submodules")
590                .action(ArgAction::SetTrue)
591                .overrides_with("no-shallow-submodules")
592                .help("shallow-clone submodules"),
593        )
594        .arg(
595            Arg::new("no-shallow-submodules")
596                .long("no-shallow-submodules")
597                .action(ArgAction::SetTrue)
598                .overrides_with("shallow-submodules")
599                .help("don't shallow-clone submodules"),
600        )
601        .arg(
602            Arg::new("jobs")
603                .long("jobs")
604                .short('j')
605                .help("The number of submodules fetched at the same time.")
606                .num_args(1)
607                .value_parser(clap::value_parser!(usize)),
608        )
609        .args(pass_through_args())
610        .after_help(
611            "These regular \"git clone\" options are passed through:\n
612        [--template=<template-directory>]
613        [-l] [-s] [--no-hardlinks] [-q] [-n] [--bare] [--mirror]
614        [-o <name>] [-b <name>] [-u <upload-pack>] [--reference <repository>]
615        [--dissociate] [--separate-git-dir <git-dir>]
616        [--depth <depth>] [--[no-]single-branch] [--no-tags]
617        [--recurse-submodules[=<pathspec>]] [--[no-]shallow-submodules]
618        [--[no-]remote-submodules] [--jobs <n>] [--sparse] [--[no-]reject-shallow]
619        [--filter=<filter> [--also-filter-submodules]]",
620        )
621}
622
623fn pass_through_args() -> Vec<Arg> {
624    let mut args = Vec::new();
625
626    // short w/o arg
627    for (short, long) in [
628        ('l', "local"),
629        //        ('n', "no-checkout"),
630        ('q', "quiet"),
631        ('s', "shared"),
632        ('v', "verbose"),
633    ]
634    .into_iter()
635    {
636        args.push(
637            Arg::new(long)
638                .short(short)
639                .long(long)
640                .hide(true)
641                .action(ArgAction::SetTrue),
642        );
643    }
644
645    //
646    args.push(
647        Arg::new("no-checkout")
648            .short('n')
649            .long("no-checkout")
650            .hide(true)
651            .num_args(0)
652            .default_value_if("commit", clap::builder::ArgPredicate::IsPresent, "true"),
653    );
654
655    args.push(
656        Arg::new("sparse")
657            .long("sparse")
658            .hide(true)
659            .num_args(0)
660            .default_value_if("sparse-add", clap::builder::ArgPredicate::IsPresent, "true"),
661    );
662
663    // short with arg
664    for (short, long) in [
665        ('b', "branch"),
666        ('c', "config"),
667        ('o', "origin"),
668        ('u', "upload-pack"),
669    ]
670    .into_iter()
671    {
672        args.push(
673            Arg::new(long)
674                .short(short)
675                .long(long)
676                .num_args(1)
677                .hide(true),
678        );
679    }
680
681    // long w/o arg
682    for id in [
683        "also-filter-submodules",
684        "bare",
685        "dissociate",
686        "mirror",
687        "no-hardlinks",
688        "no-reject-shallow",
689        "no-remote-submodules",
690        "no-single-branch",
691        "no-tags",
692        "reject-shallow",
693        "remote-submodules",
694        "single-branch",
695    ]
696    .into_iter()
697    {
698        args.push(Arg::new(id).long(id).action(ArgAction::SetTrue).hide(true));
699    }
700
701    // long with arg always
702    for id in [
703        "bundle-uri",
704        "depth",
705        "filter",
706        "reference",
707        "reference-if-able",
708        "separate-git-dir",
709        "shallow-exclude",
710        "shallow-since",
711        "template",
712    ]
713    .into_iter()
714    {
715        args.push(Arg::new(id).long(id).num_args(1).hide(true));
716    }
717
718    args
719}
720
721fn get_pass_through_args(matches: &ArgMatches) -> Vec<String> {
722    let mut args = Vec::new();
723    // w/o arg
724    for id in [
725        "local",
726        "no-checkout",
727        "quiet",
728        "shared",
729        "verbose",
730        "also-filter-submodules",
731        "bare",
732        "dissociate",
733        "mirror",
734        "no-hardlinks",
735        "no-reject-shallow",
736        "no-remote-submodules",
737        "no-single-branch",
738        "no-tags",
739        "reject-shallow",
740        "remote-submodules",
741        "single-branch",
742        "sparse",
743    ]
744    .into_iter()
745    {
746        if matches.get_flag(id) {
747            args.push(format!("--{id}"));
748        }
749    }
750
751    // with arg always
752    for id in [
753        "branch",
754        "bundle-uri",
755        "config",
756        "depth",
757        "filter",
758        "origin",
759        "reference",
760        "reference-if-able",
761        "separate-git-dir",
762        "shallow-exclude",
763        "shallow-since",
764        "template",
765        "upload-pack",
766    ]
767    .into_iter()
768    {
769        if let Some(occurrences) = matches.get_occurrences::<String>(id) {
770            for occurrence in occurrences.flatten() {
771                args.push(format!("--{id}"));
772                args.push(occurrence.clone());
773            }
774        }
775    }
776
777    args
778}
779
780trait CanCloneInto {
781    fn is_clone_target(&self) -> Result<bool, Error>;
782}
783
784impl CanCloneInto for camino::Utf8Path {
785    fn is_clone_target(&self) -> Result<bool, Error> {
786        Ok((!self.exists()) || (self.is_dir() && { self.read_dir()?.next().is_none() }))
787    }
788}
789
790trait TrueOr {
791    fn true_or(self, error: Error) -> Result<()>;
792}
793
794impl TrueOr for bool {
795    fn true_or(self, error: Error) -> Result<()> {
796        if self {
797            Ok(())
798        } else {
799            Err(error)
800        }
801    }
802}
803
804#[derive(Debug, Clone)]
805struct SubmoduleSpec {
806    path: String,
807    url: String,
808    #[allow(dead_code)]
809    branch: Option<String>,
810    commit: String,
811}
812
813impl SubmoduleSpec {
814    pub fn new(path: String, url: String, commit: String, branch: Option<String>) -> Self {
815        Self {
816            path,
817            url,
818            commit,
819            branch,
820        }
821    }
822}