Skip to main content

gkit_core/
clone.rs

1//! Config-driven clone with explicit flag placement, built-in stateless steps, and
2//! pre/post-clone hooks.
3//!
4//! Per repo, in order: global `pre-clone` → repo `pre-clone` → `git <PRE> clone
5//! <POST> <url> <dir>` → built-ins (git identity + submodule branch-switch +
6//! `direnv allow`) → global `post-clone` → repo `post-clone`.
7//!
8//! Git identity (`user.name`/`user.email`) is **per-invocation, not in the conf**
9//! (the conf is shared across a team): it comes from `Opts` (the `clone`
10//! `--user-name`/`--user-email` flags, or an interactive prompt), and is stamped
11//! `git config` on each cloned repo right after clone so `post-clone` hooks see it.
12//!
13//! The `git clone` and built-ins are **captured** (clean status; an `.envrc` that
14//! runs `glow …` can't distort output — `direnv allow` only records trust). User
15//! hooks run via `sh -c` with their output **inherited** (explicit commands, shown
16//! live) and `$GKIT_REPO`/`GKIT_DIR`/`GKIT_URL`/`GKIT_HOST`/`GKIT_NAMESPACE` set
17//! (plus `GKIT_USER_NAME`/`GKIT_USER_EMAIL`, empty when no identity was given).
18
19use crate::conf::{expand_path, CloneConf, Repo};
20use crate::git::Git;
21use std::path::{Path, PathBuf};
22use std::process::Command;
23
24#[derive(Debug, PartialEq, Eq)]
25pub enum Outcome {
26    Cloned,
27    Skipped,
28    Failed(String),
29}
30
31#[derive(Debug)]
32pub struct CloneReport {
33    pub name: String,
34    pub dir: PathBuf,
35    pub outcome: Outcome,
36    pub command: String,
37}
38
39pub struct Opts {
40    pub submodule_branch: bool,
41    pub direnv: bool,
42    /// Git identity stamped on each cloned repo (`git config user.name`). Per
43    /// invocation, not from the conf — `None` leaves the repo's inherited identity.
44    pub user_name: Option<String>,
45    /// Git identity stamped on each cloned repo (`git config user.email`).
46    pub user_email: Option<String>,
47    /// Absolute path to the conf file driving this clone, stamped as `gkit.conf` on
48    /// each top-level repo so `gkit stamp` (run inside the repo, no arg) can later
49    /// resolve its own conf. `None` (e.g. tests) skips the stamp.
50    pub conf_path: Option<String>,
51}
52
53impl Default for Opts {
54    fn default() -> Self {
55        Self {
56            submodule_branch: true,
57            direnv: true,
58            user_name: None,
59            user_email: None,
60            conf_path: None,
61        }
62    }
63}
64
65// Also reused by `fixsub` (re-applies this branch-switch over an existing tree).
66pub(crate) const SUBMODULE_SWITCH: &str = "b=$(git config -f \"$toplevel/.gitmodules\" \"submodule.$name.branch\" 2>/dev/null || echo main); git switch \"$b\" 2>/dev/null || true";
67
68/// Single-quote a value for safe interpolation into an `sh -c` command line
69/// (each embedded `'` becomes `'\''`). Shared with `fixsub`.
70pub(crate) fn sh_squote(s: &str) -> String {
71    format!("'{}'", s.replace('\'', "'\\''"))
72}
73
74/// The `git submodule foreach --recursive` body that stamps the resolved identity
75/// into each submodule, values single-quoted for `sh`. `None` when no identity was
76/// given (so the caller skips the recursion entirely).
77fn submodule_identity_cmd(user_name: Option<&str>, user_email: Option<&str>) -> Option<String> {
78    let parts: Vec<String> = [("user.name", user_name), ("user.email", user_email)]
79        .into_iter()
80        .filter_map(|(k, v)| v.map(|v| format!("git config {k} {}", sh_squote(v))))
81        .collect();
82    (!parts.is_empty()).then(|| parts.join("; "))
83}
84
85/// The git-config `(key, value)` for the **namespace-scoped** `insteadOf` rewrite that
86/// lets a *canonical* submodule URL route through the alias's key:
87///   key   = `url.<alias>:<ns>/.insteadOf`   value = `git@<hostname>:<ns>/`
88/// so git rewrites `git@<hostname>:<ns>/repo.git` → `<alias>:<ns>/repo.git` → `id_<alias>`.
89/// The trailing `/` on both sides scopes the rule to the namespace (so multiple aliases
90/// on the same host — different clients — each keep their own key).
91pub fn insteadof_pair(alias: &str, hostname: &str, ns: &str) -> (String, String) {
92    (
93        format!("url.{alias}:{ns}/.insteadOf"),
94        format!("git@{hostname}:{ns}/"),
95    )
96}
97
98/// Distinct namespaces across a conf's repos (each repo's effective namespace), in
99/// conf order, deduplicated — one `insteadOf` rule is written per distinct namespace.
100pub fn distinct_namespaces(conf: &CloneConf) -> Vec<String> {
101    let mut out: Vec<String> = Vec::new();
102    for r in &conf.repo {
103        if let Some(ns) = conf.namespace_for(r) {
104            if !out.iter().any(|n| n == ns) {
105                out.push(ns.to_string());
106            }
107        }
108    }
109    out
110}
111
112/// Run hook commands via `sh -c` in `cwd` with `env` set; output inherited; each
113/// printed `+ <cmd>`. Stops at the first non-zero exit. Shared with `stamp`, which
114/// re-runs a conf's `post-clone` over an existing tree.
115pub(crate) fn run_hooks(cmds: &[String], cwd: &Path, env: &[(&str, &str)]) -> Result<(), String> {
116    for cmd in cmds {
117        println!("+ {cmd}");
118        let mut c = Command::new("sh");
119        c.arg("-c").arg(cmd).current_dir(cwd);
120        for (k, v) in env {
121            c.env(k, v);
122        }
123        match c.status() {
124            Ok(s) if s.success() => {}
125            Ok(s) => return Err(format!("hook `{cmd}` exited {}", s.code().unwrap_or(-1))),
126            Err(e) => return Err(format!("hook `{cmd}` failed to start: {e}")),
127        }
128    }
129    Ok(())
130}
131
132/// Build the `git …` argv (everything after the program name) for one repo's clone:
133/// `git <git-flags> clone [--depth N] [--branch B] [--single-branch] --recurse-submodules
134/// <clone-flags> <repo clone-flags> <url> <dir>`.
135///
136/// `--branch` and `--single-branch` are **independent**: a plain `branch = "B"` checks
137/// out `B` from a FULL clone (all branches fetched), while `single-branch = true` adds
138/// `--single-branch` — paired with `branch` it fetches only `B`; on its own (no
139/// `branch`) it clones only the remote's default branch, exactly as bare `git clone
140/// --single-branch` does.
141fn clone_args(conf: &CloneConf, r: &Repo, url: &str, dir_s: &str) -> Vec<String> {
142    let mut args: Vec<String> = Vec::new();
143    args.extend(conf.git_flags.iter().cloned());
144    args.push("clone".into());
145    if let Some(d) = r.depth {
146        args.push("--depth".into());
147        args.push(d.to_string());
148    }
149    if let Some(b) = &r.branch {
150        args.push("--branch".into());
151        args.push(b.clone());
152    }
153    if r.single_branch {
154        args.push("--single-branch".into());
155    }
156    args.push("--recurse-submodules".into());
157    args.extend(conf.clone_flags.iter().cloned());
158    args.extend(r.clone_flags.iter().cloned());
159    args.push(url.to_string());
160    args.push(dir_s.to_string());
161    args
162}
163
164/// Clone every repo in `conf`, printing each step in order. Returns a report per
165/// repo (for the aggregate exit code).
166pub fn clone_all<G: Git>(git: &G, conf: &CloneConf, opts: &Opts) -> Vec<CloneReport> {
167    conf.repo
168        .iter()
169        .map(|r| {
170            let name = r.name();
171            let dir_s = expand_path(&r.dir, |k| std::env::var(k).ok());
172            let dir = PathBuf::from(&dir_s);
173            // Per-repo namespace overrides the global one; `clone_cmd` validates this
174            // up front, so `None` here is a defensive backstop, not a normal path.
175            let ns = match conf.namespace_for(r) {
176                Some(n) => n.to_string(),
177                None => {
178                    let e = format!("no namespace for {}", r.dir);
179                    println!("FAILED   {name:<28} {e}");
180                    return CloneReport {
181                        name,
182                        dir,
183                        outcome: Outcome::Failed(e),
184                        command: String::new(),
185                    };
186                }
187            };
188            let url = format!("{}:{}/{}.git", conf.host, ns, name);
189
190            let args = clone_args(conf, r, &url, &dir_s);
191            let command = format!("git {}", args.join(" "));
192
193            let mk = |outcome| CloneReport {
194                name: name.clone(),
195                dir: dir.clone(),
196                outcome,
197                command: command.clone(),
198            };
199
200            if dir.join(".git").exists() {
201                println!("+ {command}");
202                println!("skipped  {name:<28} {dir_s} (exists)");
203                return mk(Outcome::Skipped);
204            }
205
206            let env = [
207                ("GKIT_REPO", name.as_str()),
208                ("GKIT_DIR", dir_s.as_str()),
209                ("GKIT_URL", url.as_str()),
210                ("GKIT_HOST", conf.host.as_str()),
211                ("GKIT_NAMESPACE", ns.as_str()),
212                ("GKIT_USER_NAME", opts.user_name.as_deref().unwrap_or("")),
213                ("GKIT_USER_EMAIL", opts.user_email.as_deref().unwrap_or("")),
214            ];
215
216            // 1+2: pre-clone hooks (cwd = parent of target; create it first)
217            let parent = dir.parent().unwrap_or(Path::new("."));
218            let _ = std::fs::create_dir_all(parent);
219            let pre: Vec<String> = conf
220                .pre_clone
221                .0
222                .iter()
223                .chain(r.pre_clone.0.iter())
224                .cloned()
225                .collect();
226            if let Err(e) = run_hooks(&pre, parent, &env) {
227                println!("FAILED   {name:<28} {e}");
228                return mk(Outcome::Failed(e));
229            }
230
231            // 3: clone (printed; output captured)
232            println!("+ {command}");
233            let refs: Vec<&str> = args.iter().map(String::as_str).collect();
234            let out = git.run(Path::new("."), &refs);
235            if !out.success {
236                let e = out.stderr.trim().to_string();
237                println!("FAILED   {name:<28} {}", e.lines().next().unwrap_or(""));
238                return mk(Outcome::Failed(e));
239            }
240
241            // 4: built-ins. Identity first (printed; values are explicit user input)
242            // so post-clone hooks and direnv see it; a failure fails the repo.
243            let identity: Vec<(&str, &str)> = [
244                ("user.name", opts.user_name.as_deref()),
245                ("user.email", opts.user_email.as_deref()),
246            ]
247            .into_iter()
248            .filter_map(|(k, v)| Some((k, v?)))
249            .collect();
250            // 4a: the superproject (args passed straight to git — no shell).
251            for (key, val) in &identity {
252                println!("+ git config {key} {val}");
253                let out = git.run(&dir, &["config", key, val]);
254                if !out.success {
255                    let e = format!("git config {key} failed: {}", out.stderr.trim());
256                    println!("FAILED   {name:<28} {e}");
257                    return mk(Outcome::Failed(e));
258                }
259            }
260            // 4a': stamp gkit.conf (absolute conf path) on the superproject so
261            // `gkit stamp` (no arg, run inside this repo) can resolve its conf later.
262            if let Some(cp) = opts.conf_path.as_deref() {
263                println!("+ git config gkit.conf {cp}");
264                let out = git.run(&dir, &["config", "gkit.conf", cp]);
265                if !out.success {
266                    let e = format!("git config gkit.conf failed: {}", out.stderr.trim());
267                    println!("FAILED   {name:<28} {e}");
268                    return mk(Outcome::Failed(e));
269                }
270            }
271            // 4b: the same identity into every submodule (recursive) so commits there
272            // use it too — a submodule is its own repo with its own config. Runs via
273            // `sh -c`, so the values are single-quoted.
274            if let Some(body) =
275                submodule_identity_cmd(opts.user_name.as_deref(), opts.user_email.as_deref())
276            {
277                println!("+ git submodule foreach --recursive {body}");
278                let out = git.run(
279                    &dir,
280                    &["submodule", "foreach", "--recursive", body.as_str()],
281                );
282                if !out.success {
283                    let e = format!("submodule identity failed: {}", out.stderr.trim());
284                    println!("FAILED   {name:<28} {e}");
285                    return mk(Outcome::Failed(e));
286                }
287            }
288            // remaining built-ins (captured)
289            if opts.submodule_branch {
290                let _ = git.run(
291                    &dir,
292                    &["submodule", "foreach", "--recursive", SUBMODULE_SWITCH],
293                );
294            }
295            if opts.direnv && dir.join(".envrc").exists() {
296                let _ = Command::new("direnv").arg("allow").arg(&dir).output(); // trust-only, no eval
297            }
298
299            // 5+6: post-clone hooks (cwd = the cloned repo)
300            let post: Vec<String> = conf
301                .post_clone
302                .0
303                .iter()
304                .chain(r.post_clone.0.iter())
305                .cloned()
306                .collect();
307            if let Err(e) = run_hooks(&post, &dir, &env) {
308                println!("FAILED   {name:<28} {e}");
309                return mk(Outcome::Failed(e));
310            }
311
312            println!("cloned   {name:<28} {dir_s}");
313            mk(Outcome::Cloned)
314        })
315        .collect()
316}
317
318#[cfg(test)]
319mod tests {
320    use super::{sh_squote, submodule_identity_cmd};
321    use crate::conf;
322
323    #[test]
324    fn submodule_identity_cmd_quotes_and_skips() {
325        // both fields → two `git config`s, single-quoted, joined with `; `
326        assert_eq!(
327            submodule_identity_cmd(Some("Jane Dev"), Some("jane@acme.com")).as_deref(),
328            Some("git config user.name 'Jane Dev'; git config user.email 'jane@acme.com'")
329        );
330        // only one field set → just that one
331        assert_eq!(
332            submodule_identity_cmd(Some("Jane"), None).as_deref(),
333            Some("git config user.name 'Jane'")
334        );
335        // neither → None (caller skips the recursion)
336        assert_eq!(submodule_identity_cmd(None, None), None);
337        // an embedded single quote is escaped so `sh` can't break out
338        assert_eq!(
339            submodule_identity_cmd(Some("O'Brien"), None).as_deref(),
340            Some(r"git config user.name 'O'\''Brien'")
341        );
342        assert_eq!(sh_squote("a b"), "'a b'");
343    }
344
345    #[test]
346    fn insteadof_pair_is_namespace_scoped() {
347        // bitbucket client
348        assert_eq!(
349            super::insteadof_pair("tlbb", "bitbucket.org", "codogenics"),
350            (
351                "url.tlbb:codogenics/.insteadOf".to_string(),
352                "git@bitbucket.org:codogenics/".to_string()
353            )
354        );
355        // gitlab subgroup namespace keeps its slash
356        assert_eq!(
357            super::insteadof_pair("ctl", "gitlab.com", "grp/sub").1,
358            "git@gitlab.com:grp/sub/"
359        );
360    }
361
362    #[test]
363    fn distinct_namespaces_dedups_in_order() {
364        let c = conf::parse(
365            "host=\"h\"\nnamespace=\"glob\"\n\
366             [[repo]]\ndir=\"$H/a\"\n\
367             [[repo]]\ndir=\"$H/b\"\nnamespace=\"bob\"\n\
368             [[repo]]\ndir=\"$H/c\"\n",
369        )
370        .unwrap();
371        // glob (a), bob (b override), glob again (c) → [glob, bob], deduped, in order
372        assert_eq!(super::distinct_namespaces(&c), vec!["glob", "bob"]);
373    }
374
375    #[test]
376    fn opts_default_has_no_conf_path() {
377        // gkit.conf is opt-in: the default (used by tests / non-clone callers)
378        // leaves it unstamped.
379        assert_eq!(super::Opts::default().conf_path, None);
380    }
381
382    #[test]
383    fn builds_expected_url_shape() {
384        let c = conf::parse("host = \"tlbb\"\nnamespace = \"example-org\"\n[[repo]]\ndir = \"$HOME/x/cosp\"\ndepth = 1\n").unwrap();
385        assert_eq!(c.repo[0].name(), "cosp");
386        assert_eq!(c.repo[0].depth, Some(1));
387        let ns = c.namespace_for(&c.repo[0]).unwrap();
388        let url = format!("{}:{}/{}.git", c.host, ns, c.repo[0].name());
389        assert_eq!(url, "tlbb:example-org/cosp.git");
390    }
391
392    #[test]
393    fn branch_is_full_clone_by_default() {
394        // a plain `branch` checks out that branch WITHOUT --single-branch (full clone)
395        let c = conf::parse(
396            "host=\"tlbb\"\nnamespace=\"codogenics\"\n\
397             [[repo]]\ndir=\"$HOME/scratch-spark\"\nname=\"spark4beginners\"\n\
398             branch=\"SCB-543-spark-scala-chapter2\"\n",
399        )
400        .unwrap();
401        let args = super::clone_args(
402            &c,
403            &c.repo[0],
404            "tlbb:codogenics/spark4beginners.git",
405            "/h/s",
406        );
407        assert_eq!(
408            args,
409            [
410                "clone",
411                "--branch",
412                "SCB-543-spark-scala-chapter2",
413                "--recurse-submodules",
414                "tlbb:codogenics/spark4beginners.git",
415                "/h/s",
416            ]
417        );
418        assert!(!args.iter().any(|a| a == "--single-branch"));
419    }
420
421    #[test]
422    fn single_branch_true_adds_flag() {
423        // branch + single-branch=true → --branch B --single-branch (the old behavior)
424        let c = conf::parse(
425            "host=\"h\"\nnamespace=\"o\"\n\
426             [[repo]]\ndir=\"$H/r\"\nbranch=\"dev\"\nsingle-branch=true\n",
427        )
428        .unwrap();
429        let args = super::clone_args(&c, &c.repo[0], "h:o/r.git", "/h/r");
430        assert_eq!(
431            args,
432            [
433                "clone",
434                "--branch",
435                "dev",
436                "--single-branch",
437                "--recurse-submodules",
438                "h:o/r.git",
439                "/h/r"
440            ]
441        );
442    }
443
444    #[test]
445    fn single_branch_without_branch_clones_default_only() {
446        // single-branch=true alone → bare --single-branch (remote's default branch only)
447        let c = conf::parse(
448            "host=\"h\"\nnamespace=\"o\"\n[[repo]]\ndir=\"$H/r\"\nsingle-branch=true\n",
449        )
450        .unwrap();
451        let args = super::clone_args(&c, &c.repo[0], "h:o/r.git", "/h/r");
452        assert_eq!(
453            args,
454            [
455                "clone",
456                "--single-branch",
457                "--recurse-submodules",
458                "h:o/r.git",
459                "/h/r"
460            ]
461        );
462        assert!(!args.iter().any(|a| a == "--branch"));
463    }
464
465    #[test]
466    fn per_repo_namespace_drives_url() {
467        let c = conf::parse("host=\"gh\"\n[[repo]]\ndir=\"$HOME/x/foo\"\nnamespace=\"alice\"\n")
468            .unwrap();
469        let ns = c.namespace_for(&c.repo[0]).unwrap();
470        let url = format!("{}:{}/{}.git", c.host, ns, c.repo[0].name());
471        assert_eq!(url, "gh:alice/foo.git");
472    }
473}