Skip to main content

gkit_core/
clone.rs

1//! Config-driven clone with explicit flag placement, built-in stateless steps, and
2//! pre/post-clone hooks.
3//!
4//! Per repo, in order: global `pre-clone` → repo `pre-clone` → `git <PRE> clone
5//! <POST> <url> <dir>` → built-ins (git identity + submodule branch-switch +
6//! `direnv allow`) → global `post-clone` → repo `post-clone`.
7//!
8//! Git identity (`user.name`/`user.email`) is **per-invocation, not in the conf**
9//! (the conf is shared across a team): it comes from `Opts` (the `clone`
10//! `--user-name`/`--user-email` flags, or an interactive prompt), and is stamped
11//! `git config` on each cloned repo right after clone so `post-clone` hooks see it.
12//!
13//! The `git clone` and built-ins are **captured** (clean status; an `.envrc` that
14//! runs `glow …` can't distort output — `direnv allow` only records trust). User
15//! hooks run via `sh -c` with their output **inherited** (explicit commands, shown
16//! live) and `$GKIT_REPO`/`GKIT_DIR`/`GKIT_URL`/`GKIT_HOST`/`GKIT_NAMESPACE` set
17//! (plus `GKIT_USER_NAME`/`GKIT_USER_EMAIL`, empty when no identity was given).
18
19use crate::conf::{expand_path, CloneConf, Repo};
20use crate::git::Git;
21use std::path::{Path, PathBuf};
22use std::process::Command;
23
24#[derive(Debug, PartialEq, Eq)]
25pub enum Outcome {
26    Cloned,
27    Skipped,
28    Failed(String),
29}
30
31#[derive(Debug)]
32pub struct CloneReport {
33    pub name: String,
34    pub dir: PathBuf,
35    pub outcome: Outcome,
36    pub command: String,
37}
38
39pub struct Opts {
40    pub submodule_branch: bool,
41    pub direnv: bool,
42    /// Git identity stamped on each cloned repo (`git config user.name`). Per
43    /// invocation, not from the conf — `None` leaves the repo's inherited identity.
44    pub user_name: Option<String>,
45    /// Git identity stamped on each cloned repo (`git config user.email`).
46    pub user_email: Option<String>,
47    /// Absolute path to the conf file driving this clone, stamped as `gkit.conf` on
48    /// each top-level repo so `gkit stamp` (run inside the repo, no arg) can later
49    /// resolve its own conf. `None` (e.g. tests) skips the stamp.
50    pub conf_path: Option<String>,
51}
52
53impl Default for Opts {
54    fn default() -> Self {
55        Self {
56            submodule_branch: true,
57            direnv: true,
58            user_name: None,
59            user_email: None,
60            conf_path: None,
61        }
62    }
63}
64
65// Also reused by `fixsub` (re-applies this branch-switch over an existing tree).
66pub(crate) const SUBMODULE_SWITCH: &str = "b=$(git config -f \"$toplevel/.gitmodules\" \"submodule.$name.branch\" 2>/dev/null || echo main); git switch \"$b\" 2>/dev/null || true";
67
68/// Single-quote a value for safe interpolation into an `sh -c` command line
69/// (each embedded `'` becomes `'\''`). Shared with `fixsub`.
70pub(crate) fn sh_squote(s: &str) -> String {
71    format!("'{}'", s.replace('\'', "'\\''"))
72}
73
74/// The `git submodule foreach --recursive` body that stamps the resolved identity
75/// into each submodule, values single-quoted for `sh`. `None` when no identity was
76/// given (so the caller skips the recursion entirely).
77fn submodule_identity_cmd(user_name: Option<&str>, user_email: Option<&str>) -> Option<String> {
78    let parts: Vec<String> = [("user.name", user_name), ("user.email", user_email)]
79        .into_iter()
80        .filter_map(|(k, v)| v.map(|v| format!("git config {k} {}", sh_squote(v))))
81        .collect();
82    (!parts.is_empty()).then(|| parts.join("; "))
83}
84
85/// The git-config `(key, value)` for the **namespace-scoped** `insteadOf` rewrite that
86/// lets a *canonical* submodule URL route through the alias's key:
87///   key   = `url.<alias>:<ns>/.insteadOf`   value = `git@<hostname>:<ns>/`
88/// so git rewrites `git@<hostname>:<ns>/repo.git` → `<alias>:<ns>/repo.git` → `id_<alias>`.
89/// The trailing `/` on both sides scopes the rule to the namespace (so multiple aliases
90/// on the same host — different clients — each keep their own key).
91pub fn insteadof_pair(alias: &str, hostname: &str, ns: &str) -> (String, String) {
92    (
93        format!("url.{alias}:{ns}/.insteadOf"),
94        format!("git@{hostname}:{ns}/"),
95    )
96}
97
98/// Distinct namespaces across a conf's repos (each repo's effective namespace), in
99/// conf order, deduplicated — one `insteadOf` rule is written per distinct namespace.
100pub fn distinct_namespaces(conf: &CloneConf) -> Vec<String> {
101    let mut out: Vec<String> = Vec::new();
102    for r in &conf.repo {
103        if let Some(ns) = conf.namespace_for(r) {
104            if !out.iter().any(|n| n == ns) {
105                out.push(ns.to_string());
106            }
107        }
108    }
109    out
110}
111
112/// Run hook commands, **fail-fast**, with `env` set and output inherited; each printed
113/// `+ <cmd>`. Shared with `stamp`, which re-runs a conf's `post-clone` over an existing tree.
114///
115/// **Each command runs as its own `sh -ec '<cmd>'` process with cwd = `cwd` (the repo
116/// root).** Two consequences worth knowing when writing conf hooks:
117/// - **`set -e` (the `-e`)** → a multi-step command fails fast *within* the line: e.g.
118///   `cd sub; git config …` stops if `cd sub` fails (the `git config` never runs). So you
119///   don't need defensive `&&` chaining or `|| true` to keep a bad step from doing damage.
120/// - **fresh process per line, from the repo root** → cwd does **not** persist across
121///   lines (a `cd` on one line can't leak into the next — equivalent to a subshell, but
122///   stronger). Keep a `cd` and its command on the *same* line: `cd sub && git config …`.
123///
124/// The whole array is still fail-fast: the first command that exits non-zero aborts the
125/// rest and returns `Err` (the caller marks the repo `FAILED`). A genuinely tolerable
126/// command can still opt out with an explicit `cmd || true` — that's no longer mandatory
127/// boilerplate, just an occasional, deliberate choice.
128pub(crate) fn run_hooks(cmds: &[String], cwd: &Path, env: &[(&str, &str)]) -> Result<(), String> {
129    for cmd in cmds {
130        println!("+ {cmd}");
131        let mut c = Command::new("sh");
132        // `-e`: abort the command at its first failing step (within-line fail-fast).
133        c.arg("-e").arg("-c").arg(cmd).current_dir(cwd);
134        for (k, v) in env {
135            c.env(k, v);
136        }
137        match c.status() {
138            Ok(s) if s.success() => {}
139            Ok(s) => return Err(format!("hook `{cmd}` exited {}", s.code().unwrap_or(-1))),
140            Err(e) => return Err(format!("hook `{cmd}` failed to start: {e}")),
141        }
142    }
143    Ok(())
144}
145
146/// Build the `git …` argv (everything after the program name) for one repo's clone:
147/// `git <git-flags> clone [--depth N] [--branch B] [--single-branch] --recurse-submodules
148/// <clone-flags> <repo clone-flags> <url> <dir>`.
149///
150/// `--branch` and `--single-branch` are **independent**: a plain `branch = "B"` checks
151/// out `B` from a FULL clone (all branches fetched), while `single-branch = true` adds
152/// `--single-branch` — paired with `branch` it fetches only `B`; on its own (no
153/// `branch`) it clones only the remote's default branch, exactly as bare `git clone
154/// --single-branch` does.
155fn clone_args(conf: &CloneConf, r: &Repo, url: &str, dir_s: &str) -> Vec<String> {
156    let mut args: Vec<String> = Vec::new();
157    args.extend(conf.git_flags.iter().cloned());
158    args.push("clone".into());
159    if let Some(d) = r.depth {
160        args.push("--depth".into());
161        args.push(d.to_string());
162    }
163    if let Some(b) = &r.branch {
164        args.push("--branch".into());
165        args.push(b.clone());
166    }
167    if r.single_branch {
168        args.push("--single-branch".into());
169    }
170    args.push("--recurse-submodules".into());
171    args.extend(conf.clone_flags.iter().cloned());
172    args.extend(r.clone_flags.iter().cloned());
173    args.push(url.to_string());
174    args.push(dir_s.to_string());
175    args
176}
177
178/// Clone every repo in `conf`, printing each step in order. Returns a report per
179/// repo (for the aggregate exit code).
180pub fn clone_all<G: Git>(git: &G, conf: &CloneConf, opts: &Opts) -> Vec<CloneReport> {
181    conf.repo
182        .iter()
183        .map(|r| {
184            let name = r.name();
185            let dir_s = expand_path(&r.dir, |k| std::env::var(k).ok());
186            let dir = PathBuf::from(&dir_s);
187            // Per-repo namespace overrides the global one; `clone_cmd` validates this
188            // up front, so `None` here is a defensive backstop, not a normal path.
189            let ns = match conf.namespace_for(r) {
190                Some(n) => n.to_string(),
191                None => {
192                    let e = format!("no namespace for {}", r.dir);
193                    println!("FAILED   {name:<28} {e}");
194                    return CloneReport {
195                        name,
196                        dir,
197                        outcome: Outcome::Failed(e),
198                        command: String::new(),
199                    };
200                }
201            };
202            let url = format!("{}:{}/{}.git", conf.host, ns, name);
203
204            let args = clone_args(conf, r, &url, &dir_s);
205            let command = format!("git {}", args.join(" "));
206
207            let mk = |outcome| CloneReport {
208                name: name.clone(),
209                dir: dir.clone(),
210                outcome,
211                command: command.clone(),
212            };
213
214            if dir.join(".git").exists() {
215                println!("+ {command}");
216                println!("skipped  {name:<28} {dir_s} (exists)");
217                return mk(Outcome::Skipped);
218            }
219
220            let env = [
221                ("GKIT_REPO", name.as_str()),
222                ("GKIT_DIR", dir_s.as_str()),
223                ("GKIT_URL", url.as_str()),
224                ("GKIT_HOST", conf.host.as_str()),
225                ("GKIT_NAMESPACE", ns.as_str()),
226                ("GKIT_USER_NAME", opts.user_name.as_deref().unwrap_or("")),
227                ("GKIT_USER_EMAIL", opts.user_email.as_deref().unwrap_or("")),
228            ];
229
230            // 1+2: pre-clone hooks (cwd = parent of target; create it first)
231            let parent = dir.parent().unwrap_or(Path::new("."));
232            let _ = std::fs::create_dir_all(parent);
233            let pre: Vec<String> = conf
234                .pre_clone
235                .0
236                .iter()
237                .chain(r.pre_clone.0.iter())
238                .cloned()
239                .collect();
240            if let Err(e) = run_hooks(&pre, parent, &env) {
241                println!("FAILED   {name:<28} {e}");
242                return mk(Outcome::Failed(e));
243            }
244
245            // 3: clone (printed; output captured)
246            println!("+ {command}");
247            let refs: Vec<&str> = args.iter().map(String::as_str).collect();
248            let out = git.run(Path::new("."), &refs);
249            if !out.success {
250                let e = out.stderr.trim().to_string();
251                println!("FAILED   {name:<28} {}", e.lines().next().unwrap_or(""));
252                return mk(Outcome::Failed(e));
253            }
254
255            // 4: built-ins. Identity first (printed; values are explicit user input)
256            // so post-clone hooks and direnv see it; a failure fails the repo.
257            let identity: Vec<(&str, &str)> = [
258                ("user.name", opts.user_name.as_deref()),
259                ("user.email", opts.user_email.as_deref()),
260            ]
261            .into_iter()
262            .filter_map(|(k, v)| Some((k, v?)))
263            .collect();
264            // 4a: the superproject (args passed straight to git — no shell).
265            for (key, val) in &identity {
266                println!("+ git config {key} {val}");
267                let out = git.run(&dir, &["config", key, val]);
268                if !out.success {
269                    let e = format!("git config {key} failed: {}", out.stderr.trim());
270                    println!("FAILED   {name:<28} {e}");
271                    return mk(Outcome::Failed(e));
272                }
273            }
274            // 4a': stamp gkit.conf (absolute conf path) on the superproject so
275            // `gkit stamp` (no arg, run inside this repo) can resolve its conf later.
276            if let Some(cp) = opts.conf_path.as_deref() {
277                println!("+ git config gkit.conf {cp}");
278                let out = git.run(&dir, &["config", "gkit.conf", cp]);
279                if !out.success {
280                    let e = format!("git config gkit.conf failed: {}", out.stderr.trim());
281                    println!("FAILED   {name:<28} {e}");
282                    return mk(Outcome::Failed(e));
283                }
284            }
285            // 4b: the same identity into every submodule (recursive) so commits there
286            // use it too — a submodule is its own repo with its own config. Runs via
287            // `sh -c`, so the values are single-quoted.
288            if let Some(body) =
289                submodule_identity_cmd(opts.user_name.as_deref(), opts.user_email.as_deref())
290            {
291                println!("+ git submodule foreach --recursive {body}");
292                let out = git.run(
293                    &dir,
294                    &["submodule", "foreach", "--recursive", body.as_str()],
295                );
296                if !out.success {
297                    let e = format!("submodule identity failed: {}", out.stderr.trim());
298                    println!("FAILED   {name:<28} {e}");
299                    return mk(Outcome::Failed(e));
300                }
301            }
302            // remaining built-ins (captured)
303            if opts.submodule_branch {
304                let _ = git.run(
305                    &dir,
306                    &["submodule", "foreach", "--recursive", SUBMODULE_SWITCH],
307                );
308            }
309            if opts.direnv && dir.join(".envrc").exists() {
310                let _ = Command::new("direnv").arg("allow").arg(&dir).output(); // trust-only, no eval
311            }
312
313            // 5+6: post-clone hooks (cwd = the cloned repo)
314            let post: Vec<String> = conf
315                .post_clone
316                .0
317                .iter()
318                .chain(r.post_clone.0.iter())
319                .cloned()
320                .collect();
321            if let Err(e) = run_hooks(&post, &dir, &env) {
322                println!("FAILED   {name:<28} {e}");
323                return mk(Outcome::Failed(e));
324            }
325
326            println!("cloned   {name:<28} {dir_s}");
327            mk(Outcome::Cloned)
328        })
329        .collect()
330}
331
332#[cfg(test)]
333mod tests {
334    use super::{sh_squote, submodule_identity_cmd};
335    use crate::conf;
336
337    #[test]
338    fn submodule_identity_cmd_quotes_and_skips() {
339        // both fields → two `git config`s, single-quoted, joined with `; `
340        assert_eq!(
341            submodule_identity_cmd(Some("Jane Dev"), Some("jane@acme.com")).as_deref(),
342            Some("git config user.name 'Jane Dev'; git config user.email 'jane@acme.com'")
343        );
344        // only one field set → just that one
345        assert_eq!(
346            submodule_identity_cmd(Some("Jane"), None).as_deref(),
347            Some("git config user.name 'Jane'")
348        );
349        // neither → None (caller skips the recursion)
350        assert_eq!(submodule_identity_cmd(None, None), None);
351        // an embedded single quote is escaped so `sh` can't break out
352        assert_eq!(
353            submodule_identity_cmd(Some("O'Brien"), None).as_deref(),
354            Some(r"git config user.name 'O'\''Brien'")
355        );
356        assert_eq!(sh_squote("a b"), "'a b'");
357    }
358
359    #[test]
360    fn insteadof_pair_is_namespace_scoped() {
361        // bitbucket client
362        assert_eq!(
363            super::insteadof_pair("tlbb", "bitbucket.org", "codogenics"),
364            (
365                "url.tlbb:codogenics/.insteadOf".to_string(),
366                "git@bitbucket.org:codogenics/".to_string()
367            )
368        );
369        // gitlab subgroup namespace keeps its slash
370        assert_eq!(
371            super::insteadof_pair("ctl", "gitlab.com", "grp/sub").1,
372            "git@gitlab.com:grp/sub/"
373        );
374    }
375
376    #[test]
377    fn distinct_namespaces_dedups_in_order() {
378        let c = conf::parse(
379            "host=\"h\"\nnamespace=\"glob\"\n\
380             [[repo]]\ndir=\"$H/a\"\n\
381             [[repo]]\ndir=\"$H/b\"\nnamespace=\"bob\"\n\
382             [[repo]]\ndir=\"$H/c\"\n",
383        )
384        .unwrap();
385        // glob (a), bob (b override), glob again (c) → [glob, bob], deduped, in order
386        assert_eq!(super::distinct_namespaces(&c), vec!["glob", "bob"]);
387    }
388
389    #[test]
390    fn opts_default_has_no_conf_path() {
391        // gkit.conf is opt-in: the default (used by tests / non-clone callers)
392        // leaves it unstamped.
393        assert_eq!(super::Opts::default().conf_path, None);
394    }
395
396    #[test]
397    fn builds_expected_url_shape() {
398        let c = conf::parse("host = \"tlbb\"\nnamespace = \"example-org\"\n[[repo]]\ndir = \"$HOME/x/cosp\"\ndepth = 1\n").unwrap();
399        assert_eq!(c.repo[0].name(), "cosp");
400        assert_eq!(c.repo[0].depth, Some(1));
401        let ns = c.namespace_for(&c.repo[0]).unwrap();
402        let url = format!("{}:{}/{}.git", c.host, ns, c.repo[0].name());
403        assert_eq!(url, "tlbb:example-org/cosp.git");
404    }
405
406    #[test]
407    fn branch_is_full_clone_by_default() {
408        // a plain `branch` checks out that branch WITHOUT --single-branch (full clone)
409        let c = conf::parse(
410            "host=\"tlbb\"\nnamespace=\"codogenics\"\n\
411             [[repo]]\ndir=\"$HOME/scratch-spark\"\nname=\"spark4beginners\"\n\
412             branch=\"SCB-543-spark-scala-chapter2\"\n",
413        )
414        .unwrap();
415        let args = super::clone_args(
416            &c,
417            &c.repo[0],
418            "tlbb:codogenics/spark4beginners.git",
419            "/h/s",
420        );
421        assert_eq!(
422            args,
423            [
424                "clone",
425                "--branch",
426                "SCB-543-spark-scala-chapter2",
427                "--recurse-submodules",
428                "tlbb:codogenics/spark4beginners.git",
429                "/h/s",
430            ]
431        );
432        assert!(!args.iter().any(|a| a == "--single-branch"));
433    }
434
435    #[test]
436    fn single_branch_true_adds_flag() {
437        // branch + single-branch=true → --branch B --single-branch (the old behavior)
438        let c = conf::parse(
439            "host=\"h\"\nnamespace=\"o\"\n\
440             [[repo]]\ndir=\"$H/r\"\nbranch=\"dev\"\nsingle-branch=true\n",
441        )
442        .unwrap();
443        let args = super::clone_args(&c, &c.repo[0], "h:o/r.git", "/h/r");
444        assert_eq!(
445            args,
446            [
447                "clone",
448                "--branch",
449                "dev",
450                "--single-branch",
451                "--recurse-submodules",
452                "h:o/r.git",
453                "/h/r"
454            ]
455        );
456    }
457
458    #[test]
459    fn single_branch_without_branch_clones_default_only() {
460        // single-branch=true alone → bare --single-branch (remote's default branch only)
461        let c = conf::parse(
462            "host=\"h\"\nnamespace=\"o\"\n[[repo]]\ndir=\"$H/r\"\nsingle-branch=true\n",
463        )
464        .unwrap();
465        let args = super::clone_args(&c, &c.repo[0], "h:o/r.git", "/h/r");
466        assert_eq!(
467            args,
468            [
469                "clone",
470                "--single-branch",
471                "--recurse-submodules",
472                "h:o/r.git",
473                "/h/r"
474            ]
475        );
476        assert!(!args.iter().any(|a| a == "--branch"));
477    }
478
479    #[test]
480    fn per_repo_namespace_drives_url() {
481        let c = conf::parse("host=\"gh\"\n[[repo]]\ndir=\"$HOME/x/foo\"\nnamespace=\"alice\"\n")
482            .unwrap();
483        let ns = c.namespace_for(&c.repo[0]).unwrap();
484        let url = format!("{}:{}/{}.git", c.host, ns, c.repo[0].name());
485        assert_eq!(url, "gh:alice/foo.git");
486    }
487}