1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
//! `git lfs pull [<remote>] [<ref>...]` — `fetch` + materialize LFS
//! files in the working tree.
//!
//! After `fetch` populates the store, walk HEAD's tree to find every
//! tracked LFS pointer and rewrite the working-tree file with its
//! content from the store. Walking the tree (rather than `git ls-files`)
//! handles the "user `rm`'d the file" case — `git lfs pull` should
//! restore deleted tracked files from the store, matching upstream.
//!
//! Doing the rewrite ourselves (rather than `git checkout HEAD -- .`)
//! is deliberate: `git checkout` skips files it considers "unchanged"
//! relative to the index — and a pointer text that's also what's in
//! the index counts as unchanged. We'd never re-trigger the smudge
//! filter that way.
use std::io::Write;
use std::path::Path;
use std::process::{Command, Stdio};
use git_lfs_filter::{SmudgeError, smudge_object_to};
use git_lfs_git::scanner::scan_index_lfs;
use git_lfs_pointer::Pointer;
use git_lfs_store::Store;
use crate::collect_smudge_extensions;
use crate::fetch::{self, FetchCommandError};
#[derive(Debug, thiserror::Error)]
pub enum PullCommandError {
#[error(transparent)]
Fetch(#[from] FetchCommandError),
#[error(transparent)]
Git(#[from] git_lfs_git::Error),
#[error(transparent)]
Io(#[from] std::io::Error),
#[error("git ls-files failed: {0}")]
LsFiles(String),
#[error("partial pull: {0} object(s) failed to fetch — working tree not updated")]
FetchFailures(usize),
#[error("smudge: {0}")]
Smudge(#[from] SmudgeError),
}
pub fn pull_with_filter(
cwd: &Path,
args: &[String],
include: &[String],
exclude: &[String],
) -> Result<(), PullCommandError> {
let opts = fetch::FetchOptions {
args,
stdin_lines: &[],
dry_run: false,
json: false,
all: false,
refetch: false,
stdin: false,
prune: false,
recent: false,
include,
exclude,
};
let outcome = fetch::fetch(cwd, &opts)?;
if !outcome.report.failed.is_empty() {
return Err(PullCommandError::FetchFailures(outcome.report.failed.len()));
}
// Match upstream `newSingleCheckout`: if the smudge filter isn't
// installed (no `filter.lfs.clean` config), skip the working-
// tree materialize step and tell the user how to fix it. The
// fetch above still ran, so objects land in `.git/lfs/objects/`
// and `git lfs install` later will smudge them in.
if !smudge_filter_installed(cwd) {
println!(
"Skipping object checkout, Git LFS is not installed for this repository.\n\
Consider installing it with 'git lfs install'."
);
return Ok(());
}
// Bare repos have no working tree, so the materialize phase is a
// no-op. Fetch already ran above; we're done.
if is_bare_repo(cwd) {
return Ok(());
}
// Build the same include/exclude filter `fetch` used so the
// working-tree rewrite respects -I / -X (or `lfs.fetchinclude` /
// `lfs.fetchexclude`). Without this an LFS object that fetch
// skipped would still be rewritten in-place if it happened to be
// present locally already.
let include_set = fetch::build_pattern_set(cwd, include, "lfs.fetchinclude")?;
let exclude_set = fetch::build_pattern_set(cwd, exclude, "lfs.fetchexclude")?;
let mut store = Store::new(git_lfs_git::lfs_dir(cwd)?)
.with_references(git_lfs_git::lfs_alternate_dirs(cwd).unwrap_or_default());
if let Some(v) = crate::shared_repo_config(cwd) {
store = store.with_shared_repository(&v);
}
let repo_root = repo_root(cwd)?;
// Pointer extensions (case-inverter, encryption shims, etc.) need
// to run on the way out of the store. Resolve once before the
// walk; non-extension files take the direct-copy fast path inside
// the loop.
let smudge_extensions = collect_smudge_extensions(cwd);
// Walk via the index (`git ls-files :(attr:filter=lfs)`) instead
// of HEAD's tree: respects sparse-checkout (don't materialize
// out-of-cone files) and reads what's *staged*, matching what a
// checkout would actually smudge.
let pointers = scan_index_lfs(&repo_root)?;
let mut rewritten_paths: Vec<String> = Vec::new();
for p in &pointers {
// Empty pointers (size 0) come from genuinely empty files in
// the index — git stores those under the empty-blob hash, and
// `Pointer::parse` of empty bytes is Ok(empty()). There's
// nothing to materialize; touching the working-tree file
// would needlessly bump mtime (test 17).
if p.size == 0 {
continue;
}
// Iterate every working-tree path the same LFS object lives
// at — `scan_index_lfs` dedups by LFS OID, but a deduped
// entry can have multiple paths (`dir1/dir.dat` and
// `dir2/dir.dat` sharing the same LFS pointer text).
for rel in &p.paths {
if !fetch::path_passes_filter(Some(rel), &include_set, &exclude_set) {
continue;
}
let rel_str = rel.to_string_lossy();
let dst = repo_root.join(rel);
// Walk the parent path components. If any is a regular
// file or symlink, refuse to write through it — matches
// upstream's "skip and warn" behavior on dir/file/symlink
// conflicts.
if let Some(rel_parent) = rel.parent()
&& !rel_parent.as_os_str().is_empty()
&& let Err(msg) = check_safe_parent(&repo_root, rel_parent)
{
println!("{rel_str:?}: {msg}");
continue;
}
// Destination policy. Symlink at the destination is a
// "not a regular file" warning (we won't overwrite a
// symlink). For regular files, mirror checkout / upstream's
// `singleCheckout.Run`: leave alone raw content or
// different-OID pointers; materialize over our own
// pointer. Capture permissions of the existing file so a
// read-only pointer text → read-only smudged content
// (test 16).
let mut preserved_perms: Option<std::fs::Permissions> = None;
match std::fs::symlink_metadata(&dst) {
Ok(meta) if meta.file_type().is_symlink() => {
println!("{rel_str:?}: not a regular file");
continue;
}
Ok(meta) if meta.is_file() => {
preserved_perms = Some(meta.permissions());
match std::fs::read(&dst) {
Ok(bytes) => match Pointer::parse(&bytes) {
Ok(existing) if existing.oid == p.oid => {}
Ok(_) => continue,
Err(_) => continue,
},
Err(e) => return Err(e.into()),
}
}
Ok(_) => {
// Some other file type (dir, fifo, …). Skip.
println!("{rel_str:?}: not a regular file");
continue;
}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
Err(e) => return Err(e.into()),
}
if !store.contains_with_size(p.oid, p.size) {
// Object missing locally (fetch failed or skipped).
// Leave whatever is on disk alone.
continue;
}
if let Some(parent) = dst.parent()
&& let Err(_e) = std::fs::create_dir_all(parent)
{
println!("{rel_str:?}: not a directory");
continue;
}
// Unlink the existing file (if any) before recreating:
// this works around a read-only existing file (we only
// need write permission on the parent directory). Ignore
// NotFound.
match std::fs::remove_file(&dst) {
Ok(_) => {}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
Err(e) => return Err(e.into()),
}
let mut out = match std::fs::File::create(&dst) {
Ok(f) => f,
Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
// Read-only parent directory (test 15): warn in
// the upstream-compatible format and keep going.
// Object is in the local store; user can chmod
// and rerun.
println!("could not check out {rel_str:?}");
println!("could not create working directory file");
println!("permission denied");
continue;
}
Err(e) => return Err(e.into()),
};
// Reconstruct the pointer from the index entry and route
// through the smudge filter — for plain pointers this is
// a direct store→file copy; pointers with extensions get
// their chain run from the work-tree root (so case-
// inverter / encryption shims find `.git/`, even when
// pull was invoked from a subdirectory).
let pointer = Pointer {
oid: p.oid,
size: p.size,
extensions: p.extensions.clone(),
canonical: true,
};
smudge_object_to(
&store,
&pointer,
&mut out,
&rel_str,
&smudge_extensions,
Some(&repo_root),
)?;
if let Some(perms) = preserved_perms {
// Restore the original mode so a chmod-a-w pointer
// remains read-only after we materialize.
let _ = std::fs::set_permissions(&dst, perms);
}
rewritten_paths.push(rel.to_string_lossy().into_owned());
}
}
if !rewritten_paths.is_empty() {
// After overwriting working-tree files, the stat info in the
// index is stale; `git diff-index HEAD` would report each as
// modified even though `clean(content)` hashes back to the
// original blob. `git update-index -q --refresh --stdin`
// re-stats each path and runs the clean filter to confirm
// the content blob matches; matching paths get fresh stat
// info and drop out of subsequent diff-index walks.
refresh_index(&repo_root, &rewritten_paths)?;
}
Ok(())
}
/// Walk the parent components of a repo-relative path. If any
/// component is a regular file, symlink, or some other non-directory,
/// return the upstream-formatted "not a directory" string so the
/// caller can emit `"path": not a directory` and skip. Stops at the
/// first non-existent component (we'd `create_dir_all` from there).
fn check_safe_parent(repo_root: &Path, rel_parent: &Path) -> Result<(), &'static str> {
let mut current = repo_root.to_path_buf();
for comp in rel_parent.components() {
current.push(comp);
match std::fs::symlink_metadata(¤t) {
Ok(meta) => {
let ft = meta.file_type();
if ft.is_symlink() || !ft.is_dir() {
return Err("not a directory");
}
}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(()),
Err(_) => return Err("not a directory"),
}
}
Ok(())
}
fn repo_root(cwd: &Path) -> Result<std::path::PathBuf, PullCommandError> {
let out = Command::new("git")
.arg("-C")
.arg(cwd)
.args(["rev-parse", "--show-toplevel"])
.output()?;
if !out.status.success() {
return Err(PullCommandError::LsFiles(format!(
"git rev-parse failed: {}",
String::from_utf8_lossy(&out.stderr).trim()
)));
}
let s = String::from_utf8_lossy(&out.stdout).trim().to_owned();
Ok(std::path::PathBuf::from(s))
}
fn refresh_index(cwd: &Path, paths: &[String]) -> Result<(), PullCommandError> {
let mut child = Command::new("git")
.arg("-C")
.arg(cwd)
.args(["update-index", "-q", "--refresh", "--stdin"])
.stdin(Stdio::piped())
.stdout(Stdio::null())
.stderr(Stdio::null())
.spawn()?;
if let Some(stdin) = child.stdin.as_mut() {
for p in paths {
stdin.write_all(p.as_bytes())?;
stdin.write_all(b"\n")?;
}
}
// Don't surface failures: `update-index --refresh` exits non-zero
// when *some* path is still considered dirty (e.g. genuine local
// edits we didn't rewrite), and treating that as a hard error
// would break the legitimate "clean partial pull" case.
let _ = child.wait()?;
Ok(())
}
fn is_bare_repo(cwd: &Path) -> bool {
let out = Command::new("git")
.arg("-C")
.arg(cwd)
.args(["rev-parse", "--is-bare-repository"])
.output();
matches!(out, Ok(o) if o.status.success() && o.stdout.trim_ascii() == b"true")
}
fn smudge_filter_installed(cwd: &Path) -> bool {
let out = Command::new("git")
.arg("-C")
.arg(cwd)
.args(["config", "--get", "filter.lfs.clean"])
.output();
matches!(out, Ok(o) if o.status.success() && !o.stdout.trim_ascii().is_empty())
}