sley_remote/clone.rs
1//! Callable clone orchestration for HTTP(S) and local (`file://`/path) remotes.
2//!
3//! [`clone`] performs the transport-shaped core of `git clone` for the common
4//! branch-tracking case: it initializes the destination repository, fetches from
5//! the resolved remote (reusing the Stage E [`crate::fetch`] machinery), creates
6//! the local branch at the fetched remote tip, points `refs/remotes/<origin>/HEAD`
7//! at the remote default branch, and checks out the worktree (via
8//! [`sley_worktree`]). Everything is taken as explicit parameters — the
9//! destination, the [`ObjectFormat`], the resolved [`CloneSource`], a
10//! [`CloneOptions`], two caller callbacks, and the seam objects
11//! ([`CredentialProvider`], [`ProgressSink`]) — so it never reads process-global
12//! state, mutates the process CWD, parses arguments, or prints.
13//!
14//! Crucially, [`clone`] takes the destination `git_dir` implicitly (from the
15//! init it performs) and drives the fetch against it directly, so there is no
16//! `set_current_dir` dance: the CLI's old clone path chdir'd into the new repo so
17//! its `discover_git_dir`/`ls_remote_resolved_url` helpers would resolve the
18//! freshly-created repository, then restored the CWD. Here the repository and
19//! remote are already resolved by the caller and passed in, so the process CWD is
20//! never touched.
21//!
22//! The CLI keeps everything that is policy or presentation: argument parsing, the
23//! "Cloning into…"/"done." lines and `--depth`/`--filter` warnings, the
24//! unsupported-option gating (bare/mirror, `--revision`, `--shared`/`--reference`,
25//! `--bundle-uri`, SHA-256 over HTTP), and the post-checkout steps
26//! (`--no-checkout` worktree removal, `--sparse`, `--separate-git-dir`). The two
27//! `configure` callbacks let the CLI run its own config-writing helpers (template
28//! application, `remote.<origin>.*`, `-c` overrides, `submodule.active`, branch
29//! upstream) at the right points in the flow while returning the [`GitConfig`]
30//! the next step needs, keeping that CLI-coupled config I/O out of the library.
31//!
32//! SSH clone uses the same [`crate::fetch`] SSH dispatch as fetch; only the
33//! caller-side URL resolution and post-clone presentation stay in the CLI.
34
35use std::path::{Path, PathBuf};
36
37use sley_config::GitConfig;
38use sley_core::{GitError, ObjectFormat, ObjectId, Result};
39use sley_formats::{InitOptions, RefStorageFormat, RepositoryBootstrap};
40use sley_object::{Commit, ObjectType, Tree};
41use sley_odb::{FileObjectDatabase, ObjectReader};
42use sley_refs::{FileRefStore, RefTarget, RefUpdate};
43use sley_transport::RemoteUrl;
44
45use crate::fetch::{FetchOptions, FetchSource, fetch};
46use crate::{CredentialProvider, ProgressSink};
47
48/// Internal placeholder branch used while clone initializes before it knows
49/// which branch, detached commit, or unborn remote state will own `HEAD`.
50const CLONE_UNBORN_BRANCH: &str = "__sley_clone_unborn__";
51
52/// How [`clone`] reaches the remote it is cloning from.
53///
54/// The caller resolves the remote (URL rewriting, repository discovery — all
55/// process-state dependent) and hands `clone` a concrete transport.
56pub enum CloneSource {
57 /// A smart-HTTP(S) remote at the given already-resolved URL.
58 Http(RemoteUrl),
59 /// An SSH remote at the given already-resolved URL. Fetched by spawning `ssh`
60 /// (the credential seam is unused — the `ssh` program owns authentication).
61 Ssh(RemoteUrl),
62 /// A native anonymous `git://` remote at the given already-resolved URL.
63 Git {
64 remote: RemoteUrl,
65 protocol_v2: bool,
66 },
67 /// A local repository served in-process from `git_dir`.
68 Local {
69 /// The remote repository's `$GIT_DIR`.
70 git_dir: PathBuf,
71 /// The remote repository's common `$GIT_DIR` (object format source).
72 common_git_dir: PathBuf,
73 },
74}
75
76/// The clone inputs the library needs for the branch-tracking flow, all resolved
77/// by the caller. The remaining `git clone` knobs (bare/mirror, `--revision`,
78/// templates, config overrides, sparse, separate-git-dir, etc.) stay in the CLI:
79/// the unsupported ones are gated before `clone` is called, and the config-writing
80/// ones run inside the `configure`/`configure_branch` callbacks.
81pub struct CloneOptions<'a> {
82 /// The remote name to configure and track (`--origin`, default `origin`).
83 pub origin: &'a str,
84 /// The branch to create locally and check out (the requested `--branch` or
85 /// the remote's default branch).
86 pub checkout_branch: &'a str,
87 /// The remote's default branch, used to decide whether to point
88 /// `refs/remotes/<origin>/HEAD` at it.
89 pub remote_head_branch: &'a str,
90 /// Whether only `checkout_branch` was fetched (`--single-branch`); when set,
91 /// `refs/remotes/<origin>/HEAD` is only written if the checked-out branch is
92 /// the remote default.
93 pub single_branch: bool,
94 /// Shallow clone depth (`--depth N`): truncate history to `N` commits per tip,
95 /// writing `$GIT_DIR/shallow`. `None` is a full clone. Honored by the HTTP
96 /// and SSH transports and by the in-process local server (`git clone
97 /// --no-local --depth N <path>`); a depth on a plain local clone is
98 /// warned-and-ignored upstream of `clone` by the caller, matching git's
99 /// `is_local` behavior.
100 pub depth: Option<u32>,
101 /// `--shallow-since=<date>` (parsed to an epoch): deepen to commits newer
102 /// than the date. Local in-process transport only.
103 pub deepen_since: Option<i64>,
104 /// `--shallow-exclude=<ref>` values, resolved against the remote.
105 pub deepen_not: Vec<String>,
106 /// The committer identity for the branch-creation and checkout reflog entries.
107 pub committer: Vec<u8>,
108 /// The remote `HEAD` is detached at this commit (no default branch). After
109 /// the fetch the destination checks out this commit detached instead of
110 /// creating `checkout_branch`; `refs/remotes/<origin>/HEAD` is not written.
111 pub detached_head: Option<ObjectId>,
112 /// Whether clone should populate the worktree. `--no-checkout` still writes
113 /// refs/config but must not hydrate filtered blobs solely for checkout.
114 pub checkout: bool,
115 /// Partial-clone object filter (`--filter=blob:none`) to apply to the
116 /// clone fetch. Only honored by the in-process local server.
117 pub filter: Option<sley_odb::PackObjectFilter>,
118 /// Whether `checkout_branch` came from an explicit `--branch`. When set, a
119 /// missing remote tip for that branch is a hard error ("Remote branch … not
120 /// found"); when unset, a missing tip is an empty/unborn-repository clone.
121 pub branch_explicit: bool,
122 /// Destination repository ref storage format.
123 pub ref_storage: RefStorageFormat,
124 /// SSH command-line shape for the clone's internal fetch, used for
125 /// clone-only flags like `-4`/`-6`.
126 pub ssh_options: Option<crate::ssh::SshTransportOptions>,
127}
128
129/// The structured result of a [`clone`].
130#[derive(Debug, Clone)]
131pub struct CloneOutcome {
132 /// The destination repository's `$GIT_DIR` (the `.git` directory created by
133 /// the init step). The caller uses it for its post-checkout steps.
134 pub git_dir: PathBuf,
135 /// The object id the local branch was created at (the fetched remote tip),
136 /// or `None` when the remote was empty/unborn (no branch was created and
137 /// `HEAD` was left as an unborn symref to `checkout_branch`).
138 pub branch_oid: Option<ObjectId>,
139 /// True when the remote advertised no refs for `checkout_branch` and no
140 /// `--branch`/`--revision` was requested: an empty/unborn-repository clone.
141 /// The caller prints git's "You appear to have cloned an empty repository."
142 /// warning and skips the worktree checkout.
143 pub empty: bool,
144}
145
146/// Fully resolved inputs for a [`clone`] run.
147pub struct CloneRequest<'a> {
148 /// Destination worktree/repository path.
149 pub destination: &'a Path,
150 /// Explicit destination git directory, used by `GIT_WORK_TREE git clone`
151 /// where the command-line directory is the repository admin dir and the
152 /// worktree lives elsewhere.
153 pub git_dir_override: Option<&'a Path>,
154 /// Value to write as `core.worktree` when `git_dir_override` separates the
155 /// admin dir from the checkout root.
156 pub core_worktree: Option<&'a str>,
157 /// Destination repository object format.
158 pub format: ObjectFormat,
159 /// Already-resolved clone source.
160 pub source: &'a CloneSource,
161 /// Clone behavior and branch-tracking options.
162 pub options: &'a CloneOptions<'a>,
163}
164
165/// Mutable seams used while cloning.
166pub struct CloneServices<'a> {
167 /// Callback that writes initial repository config and returns the resulting
168 /// config snapshot used for the fetch.
169 pub configure: &'a mut dyn FnMut(&Path) -> Result<GitConfig>,
170 /// Callback that writes local branch upstream config and returns the config
171 /// snapshot used for checkout filtering.
172 pub configure_branch: &'a mut dyn FnMut(&Path, &str) -> Result<GitConfig>,
173 /// Credential source for authenticated transports.
174 pub credentials: &'a mut dyn CredentialProvider,
175 /// Progress sink for fetch progress/prune notices.
176 pub progress: &'a mut dyn ProgressSink,
177}
178
179/// Clone the resolved `source` into a fresh repository at `destination`.
180///
181/// Performs the transport-shaped core the CLI's `clone_http_repository` and the
182/// inline local clone path shared: initializes the repository, invokes
183/// `configure` to let the caller write the new repo's config (returning the
184/// [`GitConfig`] to fetch against), fetches the configured refs (reusing
185/// [`crate::fetch::fetch`] with clone's fixed options), creates the local
186/// `checkout_branch` at its fetched remote tip, invokes `configure_branch` to let
187/// the caller write the branch's upstream config (returning the [`GitConfig`] to
188/// check out against), points `refs/remotes/<origin>/HEAD` at the remote default
189/// branch when appropriate, and checks out the worktree.
190///
191/// `configure` runs right after init (before the fetch) and must return the
192/// repository config; `configure_branch` runs right after the local branch is
193/// created (before the worktree checkout) and must return the config used for
194/// checkout. Splitting the config writes into these callbacks keeps the CLI's
195/// config I/O helpers (which depend on CLI-specific config serialization and
196/// templates) out of the library while preserving their ordering in the flow.
197///
198/// Emits any library-side progress through `progress` and returns the structured
199/// [`CloneOutcome`]; never prints, mutates the process CWD, or returns
200/// `GitError::Exit`. A missing `refs/remotes/<origin>/<checkout_branch>` after the
201/// fetch is reported as [`GitError::NotFound`] for the caller to map (the CLI
202/// turns an explicit `--branch` miss into its own message).
203pub fn clone(request: CloneRequest<'_>, services: CloneServices<'_>) -> Result<CloneOutcome> {
204 let layout = RepositoryBootstrap::init(InitOptions {
205 git_dir_override: request.git_dir_override.map(Path::to_path_buf),
206 core_worktree: request.core_worktree.map(str::to_string),
207 worktree: request.destination.to_path_buf(),
208 object_format: request.format,
209 object_format_explicit: false,
210 bare: false,
211 initial_branch: CLONE_UNBORN_BRANCH.into(),
212 template_dir: None,
213 copy_template_config: false,
214 separate_git_dir: None,
215 shared_repository: None,
216 ref_storage: request.options.ref_storage,
217 ref_storage_explicit: request.options.ref_storage != RefStorageFormat::Files,
218 })?;
219 let git_dir = layout.git_dir;
220
221 let config = (services.configure)(&git_dir)?;
222 crate::protocol::check_transport_allowed(
223 scheme_for_clone_source(request.source),
224 Some(&config),
225 None,
226 )
227 .map_err(crate::protocol::transport_policy_git_error)?;
228 let fetch_source = match request.source {
229 #[cfg(feature = "http")]
230 CloneSource::Http(remote) => FetchSource::Http(remote.clone()),
231 #[cfg(not(feature = "http"))]
232 CloneSource::Http(_) => {
233 return Err(GitError::Unsupported(
234 "HTTP transport is not enabled in this build".into(),
235 ));
236 }
237 CloneSource::Ssh(remote) => FetchSource::Ssh(remote.clone()),
238 CloneSource::Git {
239 remote,
240 protocol_v2,
241 } => FetchSource::Git {
242 remote: remote.clone(),
243 protocol_v2: *protocol_v2,
244 },
245 CloneSource::Local {
246 git_dir: remote_git_dir,
247 common_git_dir: remote_common_git_dir,
248 } => FetchSource::Local {
249 git_dir: remote_git_dir.clone(),
250 common_git_dir: remote_common_git_dir.clone(),
251 },
252 };
253 let fetch_options = clone_fetch_options(
254 request.options.depth,
255 request.options.deepen_since,
256 request.options.deepen_not.clone(),
257 request.options.filter.clone(),
258 !request.options.checkout,
259 request.options.ssh_options,
260 );
261 fetch(
262 crate::fetch::FetchRequest {
263 git_dir: &git_dir,
264 format: request.format,
265 config: &config,
266 remote_name: request.options.origin,
267 source: &fetch_source,
268 refspecs: &[],
269 options: &fetch_options,
270 },
271 crate::fetch::FetchServices {
272 credentials: services.credentials,
273 progress: services.progress,
274 ref_hook: None,
275 },
276 )?;
277
278 let store = FileRefStore::new(&git_dir, request.format);
279 if let Some(detached) = &request.options.detached_head {
280 write_clone_remote_head(&store, request.options)?;
281 if request.options.checkout {
282 sley_worktree::checkout_detached_filtered(
283 request.destination,
284 &git_dir,
285 request.format,
286 detached,
287 request.options.committer.clone(),
288 b"clone: checkout".to_vec(),
289 &config,
290 )?;
291 } else {
292 let mut tx = store.transaction();
293 tx.update(RefUpdate {
294 name: "HEAD".to_string(),
295 expected: None,
296 new: RefTarget::Direct(*detached),
297 reflog: None,
298 });
299 tx.commit()?;
300 }
301 return Ok(CloneOutcome {
302 git_dir,
303 branch_oid: Some(*detached),
304 empty: false,
305 });
306 }
307 let remote_branch_ref = format!(
308 "refs/remotes/{}/{}",
309 request.options.origin, request.options.checkout_branch
310 );
311 let branch_oid = match store.read_ref(&remote_branch_ref)? {
312 Some(RefTarget::Direct(oid)) => oid,
313 Some(RefTarget::Symbolic(_)) => {
314 return Err(GitError::Unsupported(
315 "clone remote-tracking branch must be direct".into(),
316 ));
317 }
318 None => {
319 // The remote advertised no tip for the branch we are tracking. When
320 // the caller did not request an explicit branch this is an
321 // empty/unborn-repository clone: upstream `builtin/clone.c` warns,
322 // skips the checkout, and leaves `HEAD` as an unborn symref pointing
323 // at the remote's (or local default) branch — `update_head`'s
324 // `unborn` arm. We mirror that by setting `HEAD` and returning a
325 // marker for the CLI to print the warning. An explicit-branch miss
326 // is still a hard error (the CLI maps it to git's "Remote branch …
327 // not found" message).
328 if request.options.branch_explicit {
329 return Err(GitError::reference_not_found(format!(
330 "remote ref {remote_branch_ref}"
331 )));
332 }
333 let unborn = format!("refs/heads/{}", request.options.checkout_branch);
334 let mut tx = store.transaction();
335 tx.update(RefUpdate {
336 name: "HEAD".to_string(),
337 expected: None,
338 new: RefTarget::Symbolic(unborn),
339 reflog: None,
340 });
341 tx.commit()?;
342 // Install branch upstream config for the unborn branch, matching
343 // git's `install_branch_config` in the unborn path.
344 (services.configure_branch)(&git_dir, request.options.checkout_branch)?;
345 return Ok(CloneOutcome {
346 git_dir,
347 branch_oid: None,
348 empty: true,
349 });
350 }
351 };
352 store.create_branch(
353 request.options.checkout_branch,
354 branch_oid.clone(),
355 request.options.committer.clone(),
356 format!(
357 "branch: Created from {}/{}",
358 request.options.origin, request.options.checkout_branch
359 )
360 .into_bytes(),
361 )?;
362 // The branch upstream config is written here and the resulting config is used
363 // for the checkout below, matching the CLI's previous order: configure the
364 // branch, point the remote `HEAD`, then read the (now final) config for the
365 // smudge-side checkout filters. Pointing `HEAD` only updates refs, so it does
366 // not change the config `configure_branch` returns.
367 let checkout_config = (services.configure_branch)(&git_dir, request.options.checkout_branch)?;
368 if request.options.checkout {
369 fetch_local_partial_clone_checkout_blobs(&request, &git_dir, branch_oid)?;
370 } else {
371 let mut tx = store.transaction();
372 tx.update(RefUpdate {
373 name: "HEAD".to_string(),
374 expected: None,
375 new: RefTarget::Symbolic(format!("refs/heads/{}", request.options.checkout_branch)),
376 reflog: None,
377 });
378 tx.commit()?;
379 }
380 write_clone_remote_head(&store, request.options)?;
381
382 if request.options.checkout {
383 sley_worktree::checkout_branch_filtered(
384 request.destination,
385 &git_dir,
386 request.format,
387 request.options.checkout_branch,
388 request.options.committer.clone(),
389 &checkout_config,
390 )?;
391 }
392
393 Ok(CloneOutcome {
394 git_dir,
395 branch_oid: Some(branch_oid),
396 empty: false,
397 })
398}
399
400fn write_clone_remote_head(store: &FileRefStore, options: &CloneOptions<'_>) -> Result<()> {
401 if options.remote_head_branch.is_empty()
402 || (options.single_branch && options.checkout_branch != options.remote_head_branch)
403 {
404 return Ok(());
405 }
406 let mut tx = store.transaction();
407 tx.update(RefUpdate {
408 name: format!("refs/remotes/{}/HEAD", options.origin),
409 expected: None,
410 new: RefTarget::Symbolic(format!(
411 "refs/remotes/{}/{}",
412 options.origin, options.remote_head_branch
413 )),
414 reflog: None,
415 });
416 tx.commit()
417}
418
419fn scheme_for_clone_source(source: &CloneSource) -> &'static str {
420 match source {
421 CloneSource::Http(remote) => crate::protocol::transport_scheme_for_remote(remote),
422 CloneSource::Ssh(remote) => crate::protocol::transport_scheme_for_remote(remote),
423 CloneSource::Git { remote, .. } => crate::protocol::transport_scheme_for_remote(remote),
424 CloneSource::Local { .. } => "file",
425 }
426}
427
428fn fetch_local_partial_clone_checkout_blobs(
429 request: &CloneRequest<'_>,
430 git_dir: &Path,
431 commit_oid: ObjectId,
432) -> Result<()> {
433 if request.options.filter.is_none() {
434 return Ok(());
435 }
436 let CloneSource::Local {
437 git_dir: remote_git_dir,
438 common_git_dir: remote_common_git_dir,
439 } = request.source
440 else {
441 return Ok(());
442 };
443
444 let local_db = FileObjectDatabase::from_git_dir(git_dir, request.format);
445 let remote_db = FileObjectDatabase::from_git_dir(remote_common_git_dir, request.format);
446 let mut wants = Vec::new();
447 let mut seen = std::collections::HashSet::new();
448 collect_checkout_materialization_wants(
449 &remote_db,
450 &local_db,
451 request.format,
452 commit_oid,
453 &mut seen,
454 &mut wants,
455 )?;
456 crate::local::install_fetch_pack_via_local_upload_pack(
457 git_dir,
458 remote_git_dir,
459 request.format,
460 wants,
461 None,
462 true,
463 false,
464 None,
465 false,
466 None,
467 )?;
468 Ok(())
469}
470
471fn collect_checkout_materialization_wants(
472 remote_db: &FileObjectDatabase,
473 local_db: &FileObjectDatabase,
474 format: ObjectFormat,
475 commit_oid: ObjectId,
476 seen: &mut std::collections::HashSet<ObjectId>,
477 wants: &mut Vec<ObjectId>,
478) -> Result<()> {
479 let commit_object = remote_db.read_object(&commit_oid)?;
480 if commit_object.object_type != ObjectType::Commit {
481 return Err(GitError::InvalidObject(format!(
482 "expected commit {commit_oid}, found {}",
483 commit_object.object_type.as_str()
484 )));
485 }
486 let commit = Commit::parse_ref(format, &commit_object.body)?;
487 collect_tree_materialization_wants(remote_db, local_db, format, commit.tree, seen, wants)
488}
489
490fn collect_tree_materialization_wants(
491 remote_db: &FileObjectDatabase,
492 local_db: &FileObjectDatabase,
493 format: ObjectFormat,
494 tree_oid: ObjectId,
495 seen: &mut std::collections::HashSet<ObjectId>,
496 wants: &mut Vec<ObjectId>,
497) -> Result<()> {
498 if !seen.insert(tree_oid) {
499 return Ok(());
500 }
501 if !local_db.contains(&tree_oid)? {
502 wants.push(tree_oid);
503 }
504 let tree_object = remote_db.read_object(&tree_oid)?;
505 if tree_object.object_type != ObjectType::Tree {
506 return Err(GitError::InvalidObject(format!(
507 "expected tree {tree_oid}, found {}",
508 tree_object.object_type.as_str()
509 )));
510 }
511 for entry in Tree::parse(format, &tree_object.body)?.entries {
512 if entry.is_tree() {
513 collect_tree_materialization_wants(
514 remote_db, local_db, format, entry.oid, seen, wants,
515 )?;
516 } else if !entry.is_gitlink() {
517 if seen.insert(entry.oid) && !local_db.contains(&entry.oid)? {
518 wants.push(entry.oid);
519 }
520 }
521 }
522 Ok(())
523}
524
525/// The fixed [`FetchOptions`] a clone fetch uses: quiet, auto-follow tags, write
526/// `FETCH_HEAD`, the requested shallow `depth`, and otherwise neutral (no prune, no
527/// `--tags`, not a dry run, not appending). Mirrors the options the CLI's clone
528/// paths passed.
529fn clone_fetch_options(
530 depth: Option<u32>,
531 deepen_since: Option<i64>,
532 deepen_not: Vec<String>,
533 filter: Option<sley_odb::PackObjectFilter>,
534 record_promisor_refs: bool,
535 ssh_options: Option<crate::ssh::SshTransportOptions>,
536) -> FetchOptions {
537 FetchOptions {
538 quiet: true,
539 auto_follow_tags: true,
540 fetch_all_tags: false,
541 prune: false,
542 prune_tags: false,
543 dry_run: false,
544 force: false,
545 append: false,
546 write_fetch_head: true,
547 tag_option_explicit: false,
548 prune_option_explicit: false,
549 prune_tags_option_explicit: false,
550 refmap: None,
551 depth,
552 merge_srcs: Vec::new(),
553 filter,
554 refetch: false,
555 cloning: true,
556 record_promisor_refs,
557 update_shallow: false,
558 deepen_relative: false,
559 update_head_ok: false,
560 deepen_since,
561 deepen_not,
562 ssh_options,
563 atomic: false,
564 }
565}