1use crate::core::output::BufferedOutput;
2use crate::{GitXError, Result};
3use std::collections::HashMap;
4use std::process::Command;
5
6#[derive(Debug, Clone)]
7pub struct FileInfo {
8 pub path: String,
9 pub size_bytes: u64,
10 pub size_mb: f64,
11}
12
13impl FileInfo {
14 pub fn new(path: String, size_bytes: u64) -> Self {
15 let size_mb = size_bytes as f64 / (1024.0 * 1024.0);
16 Self {
17 path,
18 size_bytes,
19 size_mb,
20 }
21 }
22}
23
24pub fn run(limit: usize, threshold: Option<f64>) -> Result<()> {
25 let mut output = BufferedOutput::new();
26
27 output.add_line("š Scanning repository for large files...".to_string());
28
29 let file_objects = get_file_objects().map_err(|e| GitXError::GitCommand(e.to_string()))?;
31
32 if file_objects.is_empty() {
33 output.add_line("ā¹ļø No files found in repository history".to_string());
34 output.flush();
35 return Ok(());
36 }
37
38 let mut large_files = find_largest_files(file_objects, threshold);
40
41 large_files.sort_by(|a, b| b.size_bytes.cmp(&a.size_bytes));
43
44 large_files.truncate(limit);
46
47 if large_files.is_empty() {
48 output.add_line(match threshold {
49 Some(mb) => format!("ā
No files found larger than {mb:.1} MB"),
50 None => "ā
No large files found".to_string(),
51 });
52 output.flush();
53 return Ok(());
54 }
55
56 let count = large_files.len();
57 output.add_line(match threshold {
58 Some(mb) => format!("š Top {count} files larger than {mb:.1} MB:"),
59 None => format!("š Top {count} largest files:"),
60 });
61
62 for (i, file) in large_files.iter().enumerate() {
64 output.add_line(format_file_line(i + 1, file));
65 }
66
67 let total_size: u64 = large_files.iter().map(|f| f.size_bytes).sum();
69 let total_mb = total_size as f64 / (1024.0 * 1024.0);
70 output.add_line(format_summary_message(large_files.len(), total_mb));
71
72 output.flush();
74 Ok(())
75}
76
77fn get_file_objects() -> Result<Vec<(String, String, u64)>> {
78 let output = Command::new("git")
79 .args(get_rev_list_args())
80 .output()
81 .map_err(GitXError::Io)?;
82
83 if !output.status.success() {
84 return Err(GitXError::GitCommand(
85 "Failed to get file objects from git history".to_string(),
86 ));
87 }
88
89 let stdout = String::from_utf8_lossy(&output.stdout);
90 parse_git_objects(&stdout)
91}
92
93fn get_rev_list_args() -> [&'static str; 6] {
94 [
95 "rev-list",
96 "--objects",
97 "--all",
98 "--no-object-names",
99 "--filter=blob:none",
100 "--",
101 ]
102}
103
104fn parse_git_objects(output: &str) -> Result<Vec<(String, String, u64)>> {
105 let mut objects = Vec::new();
106
107 for line in output.lines() {
108 let hash = line.trim();
109 if hash.is_empty() || hash.len() != 40 {
110 continue;
111 }
112
113 if let Ok(size) = get_object_size(hash) {
115 if size > 0 {
116 if let Ok(paths) = get_object_paths(hash) {
118 for path in paths {
119 objects.push((hash.to_string(), path, size));
120 }
121 }
122 }
123 }
124 }
125
126 Ok(objects)
127}
128
129fn get_object_size(hash: &str) -> Result<u64> {
130 let output = Command::new("git")
131 .args(["cat-file", "-s", hash])
132 .output()
133 .map_err(GitXError::Io)?;
134
135 if !output.status.success() {
136 return Err(GitXError::GitCommand(
137 "Failed to get object size".to_string(),
138 ));
139 }
140
141 let size_str = String::from_utf8_lossy(&output.stdout);
142 size_str
143 .trim()
144 .parse()
145 .map_err(|_| GitXError::Parse("Invalid size format".to_string()))
146}
147
148fn get_object_paths(hash: &str) -> Result<Vec<String>> {
149 let output = Command::new("git")
150 .args([
151 "log",
152 "--all",
153 "--pretty=format:",
154 "--name-only",
155 "--diff-filter=A",
156 "-S",
157 hash,
158 ])
159 .output()
160 .map_err(GitXError::Io)?;
161
162 if !output.status.success() {
163 return get_object_paths_fallback(hash);
165 }
166
167 let stdout = String::from_utf8_lossy(&output.stdout);
168 let paths: Vec<String> = stdout
169 .lines()
170 .filter(|line| !line.trim().is_empty())
171 .map(|line| line.trim().to_string())
172 .collect();
173
174 if paths.is_empty() {
175 get_object_paths_fallback(hash)
176 } else {
177 Ok(paths)
178 }
179}
180
181fn get_object_paths_fallback(hash: &str) -> Result<Vec<String>> {
183 let output = Command::new("git")
184 .args(["rev-list", "--objects", "--all"])
185 .output()
186 .map_err(GitXError::Io)?;
187
188 let stdout = String::from_utf8_lossy(&output.stdout);
189 let paths: Vec<String> = stdout
190 .lines()
191 .filter_map(|line| {
192 let parts: Vec<&str> = line.split_whitespace().collect();
193 if parts.len() >= 2 && parts[0] == hash {
194 Some(parts[1..].join(" "))
195 } else {
196 None
197 }
198 })
199 .collect();
200
201 if paths.is_empty() {
202 Ok(vec![format!("unknown-{}", &hash[0..8])])
203 } else {
204 Ok(paths)
205 }
206}
207
208fn find_largest_files(
209 objects: Vec<(String, String, u64)>,
210 threshold: Option<f64>,
211) -> Vec<FileInfo> {
212 let mut file_sizes: HashMap<String, u64> = HashMap::new();
213
214 for (_hash, path, size) in objects {
216 file_sizes
217 .entry(path)
218 .and_modify(|current| *current = (*current).max(size))
219 .or_insert(size);
220 }
221
222 let threshold_bytes = threshold.map(|mb| (mb * 1024.0 * 1024.0) as u64);
223
224 file_sizes
225 .into_iter()
226 .filter(|(_, size)| threshold_bytes.is_none_or(|threshold| *size >= threshold))
227 .map(|(path, size)| FileInfo::new(path, size))
228 .collect()
229}
230
231pub fn format_file_line(index: usize, file: &FileInfo) -> String {
232 format!(
233 "{index:2}. {size:>8.1} MB {path}",
234 size = file.size_mb,
235 path = file.path
236 )
237}
238
239fn format_summary_message(count: usize, total_mb: f64) -> String {
240 format!("\nš Total: {count} files, {total_mb:.1} MB")
241}
242
243pub fn format_size_human_readable(bytes: u64) -> String {
244 const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
245 let mut size = bytes as f64;
246 let mut unit_index = 0;
247
248 while size >= 1024.0 && unit_index < UNITS.len() - 1 {
249 size /= 1024.0;
250 unit_index += 1;
251 }
252
253 if unit_index == 0 {
254 format!("{size:.0} {}", UNITS[unit_index])
255 } else {
256 format!("{size:.1} {}", UNITS[unit_index])
257 }
258}