1use std::collections::HashMap;
2use std::process::Command;
3
4#[derive(Debug, Clone)]
5pub struct FileInfo {
6 pub path: String,
7 pub size_bytes: u64,
8 pub size_mb: f64,
9}
10
11impl FileInfo {
12 pub fn new(path: String, size_bytes: u64) -> Self {
13 let size_mb = size_bytes as f64 / (1024.0 * 1024.0);
14 Self {
15 path,
16 size_bytes,
17 size_mb,
18 }
19 }
20}
21
22pub fn run(limit: usize, threshold: Option<f64>) {
23 println!("{}", format_scan_start_message());
24
25 let file_objects = match get_file_objects() {
27 Ok(objects) => objects,
28 Err(msg) => {
29 eprintln!("{}", format_error_message(msg));
30 return;
31 }
32 };
33
34 if file_objects.is_empty() {
35 println!("{}", format_no_files_message());
36 return;
37 }
38
39 let mut large_files = find_largest_files(file_objects, threshold);
41
42 large_files.sort_by(|a, b| b.size_bytes.cmp(&a.size_bytes));
44
45 large_files.truncate(limit);
47
48 if large_files.is_empty() {
49 println!("{}", format_no_large_files_message(threshold));
50 return;
51 }
52
53 println!("{}", format_results_header(large_files.len(), threshold));
54
55 for (i, file) in large_files.iter().enumerate() {
57 println!("{}", format_file_line(i + 1, file));
58 }
59
60 let total_size: u64 = large_files.iter().map(|f| f.size_bytes).sum();
62 let total_mb = total_size as f64 / (1024.0 * 1024.0);
63 println!("{}", format_summary_message(large_files.len(), total_mb));
64}
65
66fn get_file_objects() -> Result<Vec<(String, String, u64)>, &'static str> {
68 let output = Command::new("git")
69 .args(get_rev_list_args())
70 .output()
71 .map_err(|_| "Failed to execute git rev-list")?;
72
73 if !output.status.success() {
74 return Err("Failed to get file objects from git history");
75 }
76
77 let stdout = String::from_utf8_lossy(&output.stdout);
78 parse_git_objects(&stdout)
79}
80
81pub fn get_rev_list_args() -> [&'static str; 6] {
83 [
84 "rev-list",
85 "--objects",
86 "--all",
87 "--no-object-names",
88 "--filter=blob:none",
89 "--",
90 ]
91}
92
93fn parse_git_objects(output: &str) -> Result<Vec<(String, String, u64)>, &'static str> {
95 let mut objects = Vec::new();
96
97 for line in output.lines() {
98 let hash = line.trim();
99 if hash.is_empty() || hash.len() != 40 {
100 continue;
101 }
102
103 if let Ok(size) = get_object_size(hash) {
105 if size > 0 {
106 if let Ok(paths) = get_object_paths(hash) {
108 for path in paths {
109 objects.push((hash.to_string(), path, size));
110 }
111 }
112 }
113 }
114 }
115
116 Ok(objects)
117}
118
119fn get_object_size(hash: &str) -> Result<u64, &'static str> {
121 let output = Command::new("git")
122 .args(["cat-file", "-s", hash])
123 .output()
124 .map_err(|_| "Failed to get object size")?;
125
126 if !output.status.success() {
127 return Err("Failed to get object size");
128 }
129
130 let size_str = String::from_utf8_lossy(&output.stdout);
131 size_str.trim().parse().map_err(|_| "Invalid size format")
132}
133
134fn get_object_paths(hash: &str) -> Result<Vec<String>, &'static str> {
136 let output = Command::new("git")
137 .args([
138 "log",
139 "--all",
140 "--pretty=format:",
141 "--name-only",
142 "--diff-filter=A",
143 "-S",
144 hash,
145 ])
146 .output()
147 .map_err(|_| "Failed to get object paths")?;
148
149 if !output.status.success() {
150 return get_object_paths_fallback(hash);
152 }
153
154 let stdout = String::from_utf8_lossy(&output.stdout);
155 let paths: Vec<String> = stdout
156 .lines()
157 .filter(|line| !line.trim().is_empty())
158 .map(|line| line.trim().to_string())
159 .collect();
160
161 if paths.is_empty() {
162 get_object_paths_fallback(hash)
163 } else {
164 Ok(paths)
165 }
166}
167
168fn get_object_paths_fallback(hash: &str) -> Result<Vec<String>, &'static str> {
170 let output = Command::new("git")
171 .args(["rev-list", "--objects", "--all"])
172 .output()
173 .map_err(|_| "Failed to get object paths")?;
174
175 let stdout = String::from_utf8_lossy(&output.stdout);
176 let paths: Vec<String> = stdout
177 .lines()
178 .filter_map(|line| {
179 let parts: Vec<&str> = line.split_whitespace().collect();
180 if parts.len() >= 2 && parts[0] == hash {
181 Some(parts[1..].join(" "))
182 } else {
183 None
184 }
185 })
186 .collect();
187
188 if paths.is_empty() {
189 Ok(vec![format!("unknown-{}", &hash[0..8])])
190 } else {
191 Ok(paths)
192 }
193}
194
195fn find_largest_files(
197 objects: Vec<(String, String, u64)>,
198 threshold: Option<f64>,
199) -> Vec<FileInfo> {
200 let mut file_sizes: HashMap<String, u64> = HashMap::new();
201
202 for (_hash, path, size) in objects {
204 file_sizes
205 .entry(path)
206 .and_modify(|current| *current = (*current).max(size))
207 .or_insert(size);
208 }
209
210 let threshold_bytes = threshold.map(|mb| (mb * 1024.0 * 1024.0) as u64);
211
212 file_sizes
213 .into_iter()
214 .filter(|(_, size)| threshold_bytes.is_none_or(|threshold| *size >= threshold))
215 .map(|(path, size)| FileInfo::new(path, size))
216 .collect()
217}
218
219pub fn format_scan_start_message() -> &'static str {
221 "š Scanning repository for large files..."
222}
223
224pub fn format_error_message(msg: &str) -> String {
226 format!("ā {msg}")
227}
228
229pub fn format_no_files_message() -> &'static str {
231 "ā¹ļø No files found in repository history"
232}
233
234pub fn format_no_large_files_message(threshold: Option<f64>) -> String {
236 match threshold {
237 Some(mb) => format!("ā
No files found larger than {mb:.1} MB"),
238 None => "ā
No large files found".to_string(),
239 }
240}
241
242pub fn format_results_header(count: usize, threshold: Option<f64>) -> String {
244 match threshold {
245 Some(mb) => format!("š Top {count} files larger than {mb:.1} MB:"),
246 None => format!("š Top {count} largest files:"),
247 }
248}
249
250pub fn format_file_line(index: usize, file: &FileInfo) -> String {
252 format!(
253 "{index:2}. {size:>8.1} MB {path}",
254 size = file.size_mb,
255 path = file.path
256 )
257}
258
259pub fn format_summary_message(count: usize, total_mb: f64) -> String {
261 format!("\nš Total: {count} files, {total_mb:.1} MB")
262}
263
264pub fn format_size_human_readable(bytes: u64) -> String {
266 const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
267 let mut size = bytes as f64;
268 let mut unit_index = 0;
269
270 while size >= 1024.0 && unit_index < UNITS.len() - 1 {
271 size /= 1024.0;
272 unit_index += 1;
273 }
274
275 if unit_index == 0 {
276 format!("{size:.0} {}", UNITS[unit_index])
277 } else {
278 format!("{size:.1} {}", UNITS[unit_index])
279 }
280}