1use std::io::{BufRead, BufReader};
19use std::path::{Path, PathBuf};
20use std::process::{Command, Stdio};
21use std::time::{SystemTime, UNIX_EPOCH};
22
23use git_lfs_pointer::{Extension, MAX_POINTER_SIZE, Oid, Pointer};
24
25use crate::Error;
26use crate::cat_file::{CatFileBatch, CatFileBatchCheck, CatFileHeader};
27
28#[derive(Debug, Clone)]
30pub struct PointerEntry {
31 pub oid: Oid,
33 pub size: u64,
35 pub path: Option<PathBuf>,
40 pub paths: Vec<PathBuf>,
47 pub canonical: bool,
51 pub extensions: Vec<Extension>,
57}
58
59pub fn scan_pointers(
71 cwd: &Path,
72 include: &[&str],
73 exclude: &[&str],
74) -> Result<Vec<PointerEntry>, Error> {
75 scan_pointers_with_args(cwd, include, exclude, &[])
76}
77
78pub fn scan_pointers_with_args(
81 cwd: &Path,
82 include: &[&str],
83 exclude: &[&str],
84 extra_cmdline_args: &[&str],
85) -> Result<Vec<PointerEntry>, Error> {
86 let entries = crate::rev_list::rev_list_with_args(cwd, include, exclude, extra_cmdline_args)?;
87
88 let mut bcheck = CatFileBatchCheck::spawn(cwd)?;
91 let mut candidates: Vec<(String, Option<String>)> = Vec::new();
92 for entry in entries {
93 match bcheck.check(&entry.oid)? {
94 CatFileHeader::Found { kind, size, .. }
95 if kind == "blob" && (size as usize) < MAX_POINTER_SIZE =>
96 {
97 candidates.push((entry.oid, entry.name));
98 }
99 _ => {}
101 }
102 }
103 drop(bcheck);
104
105 let mut batch = CatFileBatch::spawn(cwd)?;
110 let mut by_oid: std::collections::HashMap<Oid, usize> = std::collections::HashMap::new();
111 let mut out: Vec<PointerEntry> = Vec::new();
112 for (oid, name) in candidates {
113 let Some(blob) = batch.read(&oid)? else {
114 continue;
115 };
116 let Ok(pointer) = Pointer::parse(&blob.content) else {
117 continue;
118 };
119 let path_buf = name.map(PathBuf::from);
120 if let Some(&idx) = by_oid.get(&pointer.oid) {
121 if let Some(p) = path_buf
122 && !out[idx].paths.contains(&p)
123 {
124 out[idx].paths.push(p);
125 }
126 continue;
127 }
128 let paths: Vec<PathBuf> = path_buf.iter().cloned().collect();
129 by_oid.insert(pointer.oid, out.len());
130 out.push(PointerEntry {
131 oid: pointer.oid,
132 size: pointer.size,
133 path: path_buf,
134 paths,
135 canonical: pointer.canonical,
136 extensions: pointer.extensions.clone(),
137 });
138 }
139 Ok(out)
140}
141
142pub fn scan_index_lfs(cwd: &Path) -> Result<Vec<PointerEntry>, Error> {
156 let scan_cwd = match crate::run_git(cwd, &["rev-parse", "--show-toplevel"]) {
162 Ok(s) if !s.is_empty() => PathBuf::from(s),
163 _ => crate::run_git(cwd, &["rev-parse", "--absolute-git-dir"])
164 .map(PathBuf::from)
165 .unwrap_or_else(|_| cwd.to_path_buf()),
166 };
167 let filter_by_parent_dir = is_bare_repo(&scan_cwd) || is_sparse_checkout(&scan_cwd);
174
175 let out = Command::new("git")
176 .arg("-C")
177 .arg(&scan_cwd)
178 .args(["ls-files", "--stage", "-z", "--", ":(attr:filter=lfs)"])
179 .output()?;
180 if !out.status.success() {
181 return Err(Error::Failed(
182 String::from_utf8_lossy(&out.stderr).trim().to_owned(),
183 ));
184 }
185
186 let mut candidates: Vec<(String, PathBuf)> = Vec::new();
187 for record in out.stdout.split(|&b| b == 0).filter(|s| !s.is_empty()) {
188 let s = match std::str::from_utf8(record) {
189 Ok(s) => s,
190 Err(_) => continue,
191 };
192 let Some((meta, path)) = s.split_once('\t') else {
194 continue;
195 };
196 let parts: Vec<&str> = meta.split_whitespace().collect();
197 if parts.len() < 3 {
198 continue;
199 }
200 let mode = parts[0];
201 let oid = parts[1];
202 if mode == "120000" {
203 continue;
204 }
205 let path = PathBuf::from(path);
206 if filter_by_parent_dir
215 && let Some(parent) = path.parent()
216 && !parent.as_os_str().is_empty()
217 && !scan_cwd.join(parent).is_dir()
218 {
219 continue;
220 }
221 candidates.push((oid.to_string(), path));
222 }
223 if candidates.is_empty() {
224 return Ok(Vec::new());
225 }
226
227 let mut batch = CatFileBatch::spawn(cwd)?;
228 let mut by_oid: std::collections::HashMap<Oid, usize> = std::collections::HashMap::new();
229 let mut out: Vec<PointerEntry> = Vec::new();
230 for (oid, path) in candidates {
231 let Some(blob) = batch.read(&oid)? else {
232 continue;
233 };
234 let Ok(pointer) = Pointer::parse(&blob.content) else {
235 continue;
236 };
237 if let Some(&idx) = by_oid.get(&pointer.oid) {
238 if !out[idx].paths.contains(&path) {
239 out[idx].paths.push(path);
240 }
241 continue;
242 }
243 by_oid.insert(pointer.oid, out.len());
244 out.push(PointerEntry {
245 oid: pointer.oid,
246 size: pointer.size,
247 path: Some(path.clone()),
248 paths: vec![path],
249 canonical: pointer.canonical,
250 extensions: pointer.extensions.clone(),
251 });
252 }
253 Ok(out)
254}
255
256fn is_bare_repo(cwd: &Path) -> bool {
257 crate::run_git(cwd, &["rev-parse", "--is-bare-repository"])
258 .map(|s| s.trim() == "true")
259 .unwrap_or(false)
260}
261
262fn is_sparse_checkout(cwd: &Path) -> bool {
263 crate::run_git(cwd, &["config", "--get", "core.sparseCheckout"])
264 .map(|s| s.trim().eq_ignore_ascii_case("true"))
265 .unwrap_or(false)
266}
267
268#[derive(Debug, Clone)]
272pub struct TreeBlob {
273 pub path: PathBuf,
275 pub blob_oid: String,
277 pub size: u64,
279 pub mode: String,
283}
284
285pub fn scan_tree_blobs(cwd: &Path, reference: &str) -> Result<Vec<TreeBlob>, Error> {
289 if reference.contains("..") {
294 return scan_blobs_in_range(cwd, reference);
295 }
296 scan_tree_blobs_for_ref(cwd, reference)
297}
298
299fn scan_tree_blobs_for_ref(cwd: &Path, reference: &str) -> Result<Vec<TreeBlob>, Error> {
300 let out = Command::new("git")
301 .arg("-C")
302 .arg(cwd)
303 .args(["ls-tree", "--full-tree", "-r", "-z", reference])
304 .output()?;
305 if !out.status.success() {
306 return Err(Error::Failed(format!(
307 "git ls-tree failed: {}",
308 String::from_utf8_lossy(&out.stderr).trim()
309 )));
310 }
311 let mut bcheck = CatFileBatchCheck::spawn(cwd)?;
312 let mut blobs = Vec::new();
313 for record in out.stdout.split(|&b| b == 0).filter(|s| !s.is_empty()) {
314 let s = std::str::from_utf8(record)
315 .map_err(|e| Error::Failed(format!("ls-tree: non-utf8 record: {e}")))?;
316 let (header, path) = s
317 .split_once('\t')
318 .ok_or_else(|| Error::Failed(format!("ls-tree: malformed record {s:?}")))?;
319 let mut parts = header.split_whitespace();
320 let mode = parts
321 .next()
322 .ok_or_else(|| Error::Failed(format!("ls-tree: missing mode in {s:?}")))?;
323 let kind = parts.next();
324 let oid = parts
325 .next()
326 .ok_or_else(|| Error::Failed(format!("ls-tree: missing oid in {s:?}")))?;
327 if kind != Some("blob") {
328 continue;
329 }
330 if let CatFileHeader::Found { kind, size, .. } = bcheck.check(oid)?
331 && kind == "blob"
332 {
333 blobs.push(TreeBlob {
334 path: PathBuf::from(path),
335 blob_oid: oid.to_owned(),
336 size,
337 mode: mode.to_owned(),
338 });
339 }
340 }
341 Ok(blobs)
342}
343
344fn scan_blobs_in_range(cwd: &Path, range: &str) -> Result<Vec<TreeBlob>, Error> {
349 let out = Command::new("git")
350 .arg("-C")
351 .arg(cwd)
352 .args(["rev-list", range])
353 .output()?;
354 if !out.status.success() {
355 return Err(Error::Failed(format!(
356 "git rev-list failed: {}",
357 String::from_utf8_lossy(&out.stderr).trim()
358 )));
359 }
360 let mut seen: std::collections::HashSet<(PathBuf, String)> = std::collections::HashSet::new();
361 let mut all = Vec::new();
362 for line in String::from_utf8_lossy(&out.stdout).lines() {
363 let commit = line.trim();
364 if commit.is_empty() {
365 continue;
366 }
367 for blob in scan_tree_blobs_for_ref(cwd, commit)? {
368 if seen.insert((blob.path.clone(), blob.blob_oid.clone())) {
369 all.push(blob);
370 }
371 }
372 }
373 Ok(all)
374}
375
376pub fn scan_tree(cwd: &Path, reference: &str) -> Result<Vec<PointerEntry>, Error> {
388 let out = Command::new("git")
389 .arg("-C")
390 .arg(cwd)
391 .args(["ls-tree", "--full-tree", "-r", "-z", reference])
392 .output()?;
393 if !out.status.success() {
394 return Err(Error::Failed(format!(
395 "git ls-tree failed: {}",
396 String::from_utf8_lossy(&out.stderr).trim()
397 )));
398 }
399
400 let mut bcheck = CatFileBatchCheck::spawn(cwd)?;
403 let mut candidates: Vec<(String, String)> = Vec::new();
404 for record in out.stdout.split(|&b| b == 0).filter(|s| !s.is_empty()) {
405 let s = std::str::from_utf8(record)
406 .map_err(|e| Error::Failed(format!("ls-tree: non-utf8 record: {e}")))?;
407 let (header, path) = s
408 .split_once('\t')
409 .ok_or_else(|| Error::Failed(format!("ls-tree: malformed record {s:?}")))?;
410 let mut parts = header.split_whitespace();
411 let _mode = parts.next();
412 let kind = parts.next();
413 let oid = parts
414 .next()
415 .ok_or_else(|| Error::Failed(format!("ls-tree: missing oid in {s:?}")))?;
416 if kind != Some("blob") {
417 continue;
418 }
419 if let CatFileHeader::Found { kind, size, .. } = bcheck.check(oid)?
420 && kind == "blob"
421 && (size as usize) < MAX_POINTER_SIZE
422 {
423 candidates.push((oid.to_owned(), path.to_owned()));
424 }
425 }
426 drop(bcheck);
427
428 let mut batch = CatFileBatch::spawn(cwd)?;
432 let mut entries = Vec::new();
433 for (oid, path) in candidates {
434 let Some(blob) = batch.read(&oid)? else {
435 continue;
436 };
437 let Ok(pointer) = Pointer::parse(&blob.content) else {
438 continue;
439 };
440 let path_buf = PathBuf::from(path);
441 entries.push(PointerEntry {
442 oid: pointer.oid,
443 size: pointer.size,
444 path: Some(path_buf.clone()),
445 paths: vec![path_buf],
446 canonical: pointer.canonical,
447 extensions: pointer.extensions.clone(),
448 });
449 }
450 Ok(entries)
451}
452
453pub fn scan_index_pointers(cwd: &Path, reference: &str) -> Result<Vec<PointerEntry>, Error> {
464 let scan_cwd = match crate::run_git(cwd, &["rev-parse", "--show-toplevel"]) {
465 Ok(s) if !s.is_empty() => PathBuf::from(s),
466 _ => crate::run_git(cwd, &["rev-parse", "--absolute-git-dir"])
467 .map(PathBuf::from)
468 .unwrap_or_else(|_| cwd.to_path_buf()),
469 };
470
471 let mut candidates: Vec<(String, PathBuf)> = Vec::new();
472 let mut seen: std::collections::HashSet<(String, PathBuf)> = std::collections::HashSet::new();
473 for cached_arg in [&[][..], &["--cached"][..]] {
474 let mut args = vec!["diff-index", "-z"];
475 args.extend_from_slice(cached_arg);
476 args.push(reference);
477 let out = Command::new("git")
478 .arg("-C")
479 .arg(&scan_cwd)
480 .args(&args)
481 .output()?;
482 if !out.status.success() {
483 continue;
487 }
488 let bytes = &out.stdout;
493 let mut i = 0;
494 while i < bytes.len() {
495 let meta_end = bytes[i..]
497 .iter()
498 .position(|&b| b == 0)
499 .map(|p| i + p)
500 .unwrap_or(bytes.len());
501 let Ok(meta) = std::str::from_utf8(&bytes[i..meta_end]) else {
502 i = meta_end + 1;
503 continue;
504 };
505 i = meta_end + 1;
506 let parts: Vec<&str> = meta.trim_start_matches(':').split_whitespace().collect();
508 if parts.len() < 5 {
509 continue;
510 }
511 let dst_mode = parts[1];
512 let dst_sha = parts[3];
513 let status = parts[4];
514 if dst_mode == "120000"
518 || dst_mode == "160000"
519 || status.starts_with('D')
520 || dst_sha.bytes().all(|b| b == b'0')
521 {
522 let path_count = if status.starts_with('R') || status.starts_with('C') {
524 2
525 } else {
526 1
527 };
528 for _ in 0..path_count {
529 let end = bytes[i..]
530 .iter()
531 .position(|&b| b == 0)
532 .map(|p| i + p)
533 .unwrap_or(bytes.len());
534 i = end + 1;
535 }
536 continue;
537 }
538 let path_count = if status.starts_with('R') || status.starts_with('C') {
540 2
541 } else {
542 1
543 };
544 let mut path: PathBuf = PathBuf::new();
545 for n in 0..path_count {
546 let end = bytes[i..]
547 .iter()
548 .position(|&b| b == 0)
549 .map(|p| i + p)
550 .unwrap_or(bytes.len());
551 if n + 1 == path_count {
552 path = PathBuf::from(String::from_utf8_lossy(&bytes[i..end]).into_owned());
553 }
554 i = end + 1;
555 }
556 let key = (dst_sha.to_owned(), path.clone());
557 if seen.insert(key) {
558 candidates.push((dst_sha.to_owned(), path));
559 }
560 }
561 }
562 if candidates.is_empty() {
563 return Ok(Vec::new());
564 }
565
566 let mut bcheck = CatFileBatchCheck::spawn(cwd)?;
568 let mut sized: Vec<(String, PathBuf)> = Vec::new();
569 for (oid, path) in candidates {
570 match bcheck.check(&oid)? {
571 CatFileHeader::Found { kind, size, .. }
572 if kind == "blob" && (size as usize) < MAX_POINTER_SIZE =>
573 {
574 sized.push((oid, path));
575 }
576 _ => {}
577 }
578 }
579 drop(bcheck);
580
581 let mut batch = CatFileBatch::spawn(cwd)?;
582 let mut by_oid: std::collections::HashMap<Oid, usize> = std::collections::HashMap::new();
583 let mut out: Vec<PointerEntry> = Vec::new();
584 for (oid, path) in sized {
585 let Some(blob) = batch.read(&oid)? else {
586 continue;
587 };
588 let Ok(pointer) = Pointer::parse(&blob.content) else {
589 continue;
590 };
591 if let Some(&idx) = by_oid.get(&pointer.oid) {
592 if !out[idx].paths.contains(&path) {
593 out[idx].paths.push(path);
594 }
595 continue;
596 }
597 by_oid.insert(pointer.oid, out.len());
598 out.push(PointerEntry {
599 oid: pointer.oid,
600 size: pointer.size,
601 path: Some(path.clone()),
602 paths: vec![path],
603 canonical: pointer.canonical,
604 extensions: pointer.extensions.clone(),
605 });
606 }
607 Ok(out)
608}
609
610pub fn scan_stashed(cwd: &Path) -> Result<Vec<PointerEntry>, Error> {
624 let stash_shas: Vec<String> = match Command::new("git")
625 .arg("-C")
626 .arg(cwd)
627 .args(["log", "-g", "--format=%h", "refs/stash", "--"])
628 .output()
629 {
630 Ok(out) if out.status.success() => String::from_utf8_lossy(&out.stdout)
631 .lines()
632 .map(|l| l.trim().to_owned())
633 .filter(|s| !s.is_empty())
634 .collect(),
635 _ => return Ok(Vec::new()),
636 };
637 if stash_shas.is_empty() {
638 return Ok(Vec::new());
639 }
640 let mut entries: Vec<PointerEntry> = Vec::new();
646 for extra in [&["-m", "--first-parent"][..], &[][..]] {
647 let mut args: Vec<String> = vec!["log".into()];
648 for a in extra {
649 args.push((*a).to_owned());
650 }
651 for a in [
652 "--no-ext-diff",
653 "--no-textconv",
654 "--color=never",
655 "-G",
656 "oid sha256:",
657 "-p",
658 "-U12",
659 "--format=lfs-commit-sha: %H %P",
660 ] {
661 args.push(a.to_owned());
662 }
663 for sha in &stash_shas {
664 args.push(format!("{sha}^..{sha}"));
665 }
666 let arg_refs: Vec<&str> = args.iter().map(String::as_str).collect();
667 let mut child = Command::new("git")
668 .arg("-C")
669 .arg(cwd)
670 .args(&arg_refs)
671 .stdout(Stdio::piped())
672 .stderr(Stdio::piped())
673 .spawn()?;
674 let stdout = child.stdout.take().expect("piped");
675 let mut parser = LogScanner::new(LogDiffDirection::Additions);
676 for line in BufReader::new(stdout).lines() {
677 let line = line?;
678 if let Some(entry) = parser.feed(&line) {
679 entries.push(entry);
680 }
681 }
682 if let Some(entry) = parser.flush() {
683 entries.push(entry);
684 }
685 let _ = child.wait();
689 }
690 Ok(entries)
691}
692
693pub fn scan_previous_versions(
705 cwd: &Path,
706 reference: &str,
707 since: SystemTime,
708) -> Result<Vec<PointerEntry>, Error> {
709 let since_unix = since
710 .duration_since(UNIX_EPOCH)
711 .map(|d| d.as_secs() as i64)
712 .unwrap_or(0);
713 let since_arg = format!("--since=@{since_unix}");
714 let mut child = Command::new("git")
715 .arg("-C")
716 .arg(cwd)
717 .args([
718 "log",
719 "--no-ext-diff",
720 "--no-textconv",
721 "--color=never",
722 "-G",
723 "oid sha256:",
724 "-p",
725 "-U12",
726 "--format=lfs-commit-sha: %H %P",
727 &since_arg,
728 reference,
729 ])
730 .stdout(Stdio::piped())
731 .stderr(Stdio::piped())
732 .spawn()?;
733 let stdout = child.stdout.take().expect("piped");
734 let mut parser = LogScanner::new(LogDiffDirection::Deletions);
735 let mut entries = Vec::new();
736 for line in BufReader::new(stdout).lines() {
737 let line = line?;
738 if let Some(entry) = parser.feed(&line) {
739 entries.push(entry);
740 }
741 }
742 if let Some(entry) = parser.flush() {
743 entries.push(entry);
744 }
745 let status = child.wait()?;
746 if !status.success() {
747 return Err(Error::Failed(format!(
748 "git log failed: exit {:?}",
749 status.code()
750 )));
751 }
752 Ok(entries)
753}
754
755#[derive(Debug, Clone, Copy, PartialEq, Eq)]
757enum LogDiffDirection {
758 Additions,
762 Deletions,
764}
765
766struct LogScanner {
770 direction: LogDiffDirection,
771 current_filename: Option<String>,
774 pointer_data: Vec<u8>,
778}
779
780impl LogScanner {
781 fn new(direction: LogDiffDirection) -> Self {
782 Self {
783 direction,
784 current_filename: None,
785 pointer_data: Vec::new(),
786 }
787 }
788
789 fn feed(&mut self, line: &str) -> Option<PointerEntry> {
793 if line.starts_with("lfs-commit-sha: ") {
794 return self.flush();
795 }
796 if let Some(rest) = line.strip_prefix("diff --git ") {
797 let entry = self.flush();
798 self.current_filename = parse_diff_git_header(rest, self.direction);
799 return entry;
800 }
801 if let Some(rest) = line.strip_prefix("diff --cc ") {
802 let entry = self.flush();
803 self.current_filename = Some(rest.to_owned());
804 return entry;
805 }
806 if self.current_filename.is_some() && is_pointer_data_line(line, self.direction) {
807 self.pointer_data.extend_from_slice(&line.as_bytes()[1..]);
809 self.pointer_data.push(b'\n');
810 }
811 None
812 }
813
814 fn flush(&mut self) -> Option<PointerEntry> {
817 if self.pointer_data.is_empty() {
818 return None;
819 }
820 let parsed = Pointer::parse(&self.pointer_data);
821 let path = self.current_filename.as_ref().map(PathBuf::from);
822 self.pointer_data.clear();
823 let pointer = parsed.ok()?;
824 Some(PointerEntry {
825 oid: pointer.oid,
826 size: pointer.size,
827 paths: path.iter().cloned().collect(),
828 path,
829 canonical: pointer.canonical,
830 extensions: pointer.extensions,
831 })
832 }
833}
834
835fn is_pointer_data_line(line: &str, dir: LogDiffDirection) -> bool {
841 let mut chars = line.chars();
842 let Some(marker) = chars.next() else {
843 return false;
844 };
845 let dir_match = matches!(
846 (marker, dir),
847 ('+', LogDiffDirection::Additions) | ('-', LogDiffDirection::Deletions) | (' ', _)
848 );
849 if !dir_match {
850 return false;
851 }
852 let body = chars.as_str();
853 body.starts_with("version https://git-lfs")
854 || body.starts_with("oid sha256")
855 || body.starts_with("size")
856 || body.starts_with("ext-")
857}
858
859fn parse_diff_git_header(rest: &str, dir: LogDiffDirection) -> Option<String> {
869 let trimmed = rest.trim();
870 let a_idx = trimmed.find("a/")?;
871 let after_a = &trimmed[a_idx + 2..];
872 let space_idx = after_a.find(|c: char| c.is_whitespace())?;
875 let path_a = &after_a[..space_idx];
876 let after_space = after_a[space_idx..].trim_start();
877 let after_b = after_space.strip_prefix("b/")?;
878 match dir {
879 LogDiffDirection::Additions => Some(after_b.to_owned()),
880 LogDiffDirection::Deletions => Some(path_a.to_owned()),
881 }
882}
883
884#[cfg(test)]
885mod tests {
886 use super::*;
887 use crate::tests::commit_helper::*;
888
889 fn pointer_text(content: &[u8]) -> Vec<u8> {
893 use sha2::{Digest, Sha256};
894 let oid_bytes: [u8; 32] = Sha256::digest(content).into();
895 let oid_hex = oid_bytes.iter().fold(String::new(), |mut s, b| {
896 use std::fmt::Write;
897 let _ = write!(s, "{b:02x}");
898 s
899 });
900 format!(
901 "version https://git-lfs.github.com/spec/v1\noid sha256:{oid_hex}\nsize {}\n",
902 content.len()
903 )
904 .into_bytes()
905 }
906
907 #[test]
908 fn empty_repo_returns_no_pointers() {
909 let repo = init_repo();
910 commit_file(&repo, "a.txt", b"plain content");
911 let result = scan_pointers(repo.path(), &["HEAD"], &[]).unwrap();
912 assert!(result.is_empty());
913 }
914
915 #[test]
916 fn finds_pointer_blobs_skips_plain_blobs() {
917 let repo = init_repo();
918 commit_file(&repo, "plain.txt", b"just text");
920 let pointer = pointer_text(b"this would be the actual binary content");
921 commit_file(&repo, "big.bin", &pointer);
922
923 let result = scan_pointers(repo.path(), &["HEAD"], &[]).unwrap();
924 assert_eq!(result.len(), 1, "{result:?}");
925 assert_eq!(
926 result[0].size,
927 b"this would be the actual binary content".len() as u64,
928 );
929 assert_eq!(result[0].path.as_deref(), Some(Path::new("big.bin")));
930 }
931
932 #[test]
933 fn dedups_same_lfs_oid_in_multiple_paths() {
934 let repo = init_repo();
935 let pointer = pointer_text(b"shared payload");
936 commit_file(&repo, "first.bin", &pointer);
937 commit_file(&repo, "second.bin", &pointer);
938
939 let result = scan_pointers(repo.path(), &["HEAD"], &[]).unwrap();
940 assert_eq!(result.len(), 1, "{result:?}");
943 }
944
945 #[test]
946 fn finds_pointers_in_history_not_just_tip() {
947 let repo = init_repo();
948 let pointer = pointer_text(b"deleted later");
952 commit_file(&repo, "x.bin", &pointer);
953 commit_file(&repo, "x.bin", b"plain text now");
954
955 let result = scan_pointers(repo.path(), &["HEAD"], &[]).unwrap();
956 assert_eq!(result.len(), 1);
957 assert_eq!(result[0].size, b"deleted later".len() as u64);
958 }
959
960 #[test]
961 fn excludes_filter_history_walk() {
962 let repo = init_repo();
963 commit_file(&repo, "old.bin", &pointer_text(b"old payload"));
964 let first = head_oid(&repo);
965 commit_file(&repo, "new.bin", &pointer_text(b"new payload"));
966
967 let result = scan_pointers(repo.path(), &["HEAD"], &[&first]).unwrap();
969 assert_eq!(result.len(), 1, "{result:?}");
970 assert_eq!(result[0].size, b"new payload".len() as u64);
971 }
972
973 #[test]
974 fn skips_blobs_that_look_like_pointers_but_dont_parse() {
975 let repo = init_repo();
976 commit_file(&repo, "fake.bin", b"version foo\nbut not really a pointer");
978
979 let result = scan_pointers(repo.path(), &["HEAD"], &[]).unwrap();
980 assert!(result.is_empty(), "{result:?}");
981 }
982
983 #[test]
984 fn scan_tree_returns_only_tree_entries_not_history() {
985 let repo = init_repo();
986 let pointer = pointer_text(b"deleted later");
990 commit_file(&repo, "x.bin", &pointer);
991 commit_file(&repo, "x.bin", b"plain text now");
992
993 let result = scan_tree(repo.path(), "HEAD").unwrap();
994 assert!(result.is_empty(), "{result:?}");
995 }
996
997 #[test]
998 fn scan_tree_emits_one_entry_per_path_not_per_oid() {
999 let repo = init_repo();
1000 let pointer = pointer_text(b"shared payload");
1003 commit_file(&repo, "first.bin", &pointer);
1004 commit_file(&repo, "second.bin", &pointer);
1005
1006 let mut result = scan_tree(repo.path(), "HEAD").unwrap();
1007 result.sort_by(|a, b| a.path.cmp(&b.path));
1008 assert_eq!(result.len(), 2, "{result:?}");
1009 assert_eq!(result[0].path.as_deref(), Some(Path::new("first.bin")));
1010 assert_eq!(result[1].path.as_deref(), Some(Path::new("second.bin")));
1011 assert_eq!(result[0].oid, result[1].oid);
1013 }
1014
1015 #[test]
1016 fn scan_tree_skips_plain_blobs_and_keeps_pointers() {
1017 let repo = init_repo();
1018 commit_file(&repo, "plain.txt", b"just text");
1019 let pointer = pointer_text(b"binary content");
1020 commit_file(&repo, "big.bin", &pointer);
1021
1022 let result = scan_tree(repo.path(), "HEAD").unwrap();
1023 assert_eq!(result.len(), 1, "{result:?}");
1024 assert_eq!(result[0].path.as_deref(), Some(Path::new("big.bin")));
1025 }
1026
1027 #[test]
1028 fn scan_tree_unknown_ref_errors() {
1029 let repo = init_repo();
1030 commit_file(&repo, "a.txt", b"x");
1031 let err = scan_tree(repo.path(), "does-not-exist").unwrap_err();
1032 match err {
1033 Error::Failed(msg) => assert!(
1034 msg.contains("does-not-exist") || msg.contains("Not a valid"),
1035 "unexpected message: {msg}"
1036 ),
1037 _ => panic!("expected Failed, got {err:?}"),
1038 }
1039 }
1040
1041 fn feed_log<'a, I: IntoIterator<Item = &'a str>>(
1042 dir: LogDiffDirection,
1043 lines: I,
1044 ) -> Vec<PointerEntry> {
1045 let mut s = LogScanner::new(dir);
1046 let mut out = Vec::new();
1047 for line in lines {
1048 if let Some(e) = s.feed(line) {
1049 out.push(e);
1050 }
1051 }
1052 if let Some(e) = s.flush() {
1053 out.push(e);
1054 }
1055 out
1056 }
1057
1058 #[test]
1059 fn log_scanner_extracts_deleted_pointer_body() {
1060 let lines = [
1065 "lfs-commit-sha: cccccccccccccccccccccccccccccccccccccccc bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
1066 "diff --git a/foo.bin b/foo.bin",
1067 "@@ -1,3 +1,3 @@",
1068 " version https://git-lfs.github.com/spec/v1",
1069 "-oid sha256:1111111111111111111111111111111111111111111111111111111111111111",
1070 "-size 100",
1071 "+oid sha256:2222222222222222222222222222222222222222222222222222222222222222",
1072 "+size 200",
1073 ];
1074 let out = feed_log(LogDiffDirection::Deletions, lines);
1075 assert_eq!(out.len(), 1);
1076 assert_eq!(out[0].size, 100);
1077 assert_eq!(
1078 out[0]
1079 .path
1080 .as_deref()
1081 .map(|p| p.to_string_lossy().into_owned()),
1082 Some("foo.bin".to_owned())
1083 );
1084 }
1085
1086 #[test]
1087 fn log_scanner_handles_multi_file_commit() {
1088 let lines = [
1089 "lfs-commit-sha: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1090 "diff --git a/a.bin b/a.bin",
1091 " version https://git-lfs.github.com/spec/v1",
1092 "-oid sha256:1111111111111111111111111111111111111111111111111111111111111111",
1093 "-size 1",
1094 "+oid sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1095 "+size 2",
1096 "diff --git a/b.bin b/b.bin",
1097 " version https://git-lfs.github.com/spec/v1",
1098 "-oid sha256:3333333333333333333333333333333333333333333333333333333333333333",
1099 "-size 3",
1100 "+oid sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
1101 "+size 4",
1102 ];
1103 let out = feed_log(LogDiffDirection::Deletions, lines);
1104 assert_eq!(out.len(), 2);
1105 assert_eq!(out[0].size, 1);
1106 assert_eq!(out[1].size, 3);
1107 }
1108
1109 #[test]
1110 fn log_scanner_skips_non_pointer_diffs() {
1111 let lines = [
1115 "lfs-commit-sha: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
1116 "diff --git a/main.c b/main.c",
1117 "-int old() { return 1; }",
1118 "+int new() { return 2; }",
1119 ];
1120 let out = feed_log(LogDiffDirection::Deletions, lines);
1121 assert!(out.is_empty(), "got {out:?}");
1122 }
1123
1124 #[test]
1125 fn parse_diff_git_header_picks_correct_side() {
1126 let h = "a/foo.bin b/foo.bin";
1127 assert_eq!(
1128 parse_diff_git_header(h, LogDiffDirection::Additions).as_deref(),
1129 Some("foo.bin")
1130 );
1131 assert_eq!(
1132 parse_diff_git_header(h, LogDiffDirection::Deletions).as_deref(),
1133 Some("foo.bin")
1134 );
1135 let renamed = "a/old.bin b/new.bin";
1137 assert_eq!(
1138 parse_diff_git_header(renamed, LogDiffDirection::Additions).as_deref(),
1139 Some("new.bin")
1140 );
1141 assert_eq!(
1142 parse_diff_git_header(renamed, LogDiffDirection::Deletions).as_deref(),
1143 Some("old.bin")
1144 );
1145 }
1146}