git_x/
large_files.rs

1use std::collections::HashMap;
2use std::process::Command;
3
4#[derive(Debug, Clone)]
5pub struct FileInfo {
6    pub path: String,
7    pub size_bytes: u64,
8    pub size_mb: f64,
9}
10
11impl FileInfo {
12    pub fn new(path: String, size_bytes: u64) -> Self {
13        let size_mb = size_bytes as f64 / (1024.0 * 1024.0);
14        Self {
15            path,
16            size_bytes,
17            size_mb,
18        }
19    }
20}
21
22pub fn run(limit: usize, threshold: Option<f64>) {
23    println!("{}", format_scan_start_message());
24
25    // Get all file objects and their sizes
26    let file_objects = match get_file_objects() {
27        Ok(objects) => objects,
28        Err(msg) => {
29            eprintln!("{}", format_error_message(msg));
30            return;
31        }
32    };
33
34    if file_objects.is_empty() {
35        println!("{}", format_no_files_message());
36        return;
37    }
38
39    // Find the largest files by path
40    let mut large_files = find_largest_files(file_objects, threshold);
41
42    // Sort by size (largest first)
43    large_files.sort_by(|a, b| b.size_bytes.cmp(&a.size_bytes));
44
45    // Limit results
46    large_files.truncate(limit);
47
48    if large_files.is_empty() {
49        println!("{}", format_no_large_files_message(threshold));
50        return;
51    }
52
53    println!("{}", format_results_header(large_files.len(), threshold));
54
55    // Print results
56    for (i, file) in large_files.iter().enumerate() {
57        println!("{}", format_file_line(i + 1, file));
58    }
59
60    // Show summary
61    let total_size: u64 = large_files.iter().map(|f| f.size_bytes).sum();
62    let total_mb = total_size as f64 / (1024.0 * 1024.0);
63    println!("{}", format_summary_message(large_files.len(), total_mb));
64}
65
66// Helper function to get file objects from git
67fn get_file_objects() -> Result<Vec<(String, String, u64)>, &'static str> {
68    let output = Command::new("git")
69        .args(get_rev_list_args())
70        .output()
71        .map_err(|_| "Failed to execute git rev-list")?;
72
73    if !output.status.success() {
74        return Err("Failed to get file objects from git history");
75    }
76
77    let stdout = String::from_utf8_lossy(&output.stdout);
78    parse_git_objects(&stdout)
79}
80
81// Helper function to get git rev-list args
82pub fn get_rev_list_args() -> [&'static str; 6] {
83    [
84        "rev-list",
85        "--objects",
86        "--all",
87        "--no-object-names",
88        "--filter=blob:none",
89        "--",
90    ]
91}
92
93// Helper function to parse git objects output
94fn parse_git_objects(output: &str) -> Result<Vec<(String, String, u64)>, &'static str> {
95    let mut objects = Vec::new();
96
97    for line in output.lines() {
98        let hash = line.trim();
99        if hash.is_empty() || hash.len() != 40 {
100            continue;
101        }
102
103        // Get object size
104        if let Ok(size) = get_object_size(hash) {
105            if size > 0 {
106                // Get file paths for this object
107                if let Ok(paths) = get_object_paths(hash) {
108                    for path in paths {
109                        objects.push((hash.to_string(), path, size));
110                    }
111                }
112            }
113        }
114    }
115
116    Ok(objects)
117}
118
119// Helper function to get object size
120fn get_object_size(hash: &str) -> Result<u64, &'static str> {
121    let output = Command::new("git")
122        .args(["cat-file", "-s", hash])
123        .output()
124        .map_err(|_| "Failed to get object size")?;
125
126    if !output.status.success() {
127        return Err("Failed to get object size");
128    }
129
130    let size_str = String::from_utf8_lossy(&output.stdout);
131    size_str.trim().parse().map_err(|_| "Invalid size format")
132}
133
134// Helper function to get object paths
135fn get_object_paths(hash: &str) -> Result<Vec<String>, &'static str> {
136    let output = Command::new("git")
137        .args([
138            "log",
139            "--all",
140            "--pretty=format:",
141            "--name-only",
142            "--diff-filter=A",
143            "-S",
144            hash,
145        ])
146        .output()
147        .map_err(|_| "Failed to get object paths")?;
148
149    if !output.status.success() {
150        // Fallback: try to find the path using rev-list with object names
151        return get_object_paths_fallback(hash);
152    }
153
154    let stdout = String::from_utf8_lossy(&output.stdout);
155    let paths: Vec<String> = stdout
156        .lines()
157        .filter(|line| !line.trim().is_empty())
158        .map(|line| line.trim().to_string())
159        .collect();
160
161    if paths.is_empty() {
162        get_object_paths_fallback(hash)
163    } else {
164        Ok(paths)
165    }
166}
167
168// Fallback method to get object paths
169fn get_object_paths_fallback(hash: &str) -> Result<Vec<String>, &'static str> {
170    let output = Command::new("git")
171        .args(["rev-list", "--objects", "--all"])
172        .output()
173        .map_err(|_| "Failed to get object paths")?;
174
175    let stdout = String::from_utf8_lossy(&output.stdout);
176    let paths: Vec<String> = stdout
177        .lines()
178        .filter_map(|line| {
179            let parts: Vec<&str> = line.split_whitespace().collect();
180            if parts.len() >= 2 && parts[0] == hash {
181                Some(parts[1..].join(" "))
182            } else {
183                None
184            }
185        })
186        .collect();
187
188    if paths.is_empty() {
189        Ok(vec![format!("unknown-{}", &hash[0..8])])
190    } else {
191        Ok(paths)
192    }
193}
194
195// Helper function to find largest files
196fn find_largest_files(
197    objects: Vec<(String, String, u64)>,
198    threshold: Option<f64>,
199) -> Vec<FileInfo> {
200    let mut file_sizes: HashMap<String, u64> = HashMap::new();
201
202    // Group by file path and take the maximum size
203    for (_hash, path, size) in objects {
204        file_sizes
205            .entry(path)
206            .and_modify(|current| *current = (*current).max(size))
207            .or_insert(size);
208    }
209
210    let threshold_bytes = threshold.map(|mb| (mb * 1024.0 * 1024.0) as u64);
211
212    file_sizes
213        .into_iter()
214        .filter(|(_, size)| threshold_bytes.is_none_or(|threshold| *size >= threshold))
215        .map(|(path, size)| FileInfo::new(path, size))
216        .collect()
217}
218
219// Helper function to format scan start message
220pub fn format_scan_start_message() -> &'static str {
221    "šŸ” Scanning repository for large files..."
222}
223
224// Helper function to format error message
225pub fn format_error_message(msg: &str) -> String {
226    format!("āŒ {msg}")
227}
228
229// Helper function to format no files message
230pub fn format_no_files_message() -> &'static str {
231    "ā„¹ļø No files found in repository history"
232}
233
234// Helper function to format no large files message
235pub fn format_no_large_files_message(threshold: Option<f64>) -> String {
236    match threshold {
237        Some(mb) => format!("āœ… No files found larger than {mb:.1} MB"),
238        None => "āœ… No large files found".to_string(),
239    }
240}
241
242// Helper function to format results header
243pub fn format_results_header(count: usize, threshold: Option<f64>) -> String {
244    match threshold {
245        Some(mb) => format!("šŸ“Š Top {count} files larger than {mb:.1} MB:"),
246        None => format!("šŸ“Š Top {count} largest files:"),
247    }
248}
249
250// Helper function to format file line
251pub fn format_file_line(index: usize, file: &FileInfo) -> String {
252    format!(
253        "{index:2}. {size:>8.1} MB  {path}",
254        size = file.size_mb,
255        path = file.path
256    )
257}
258
259// Helper function to format summary message
260pub fn format_summary_message(count: usize, total_mb: f64) -> String {
261    format!("\nšŸ“ˆ Total: {count} files, {total_mb:.1} MB")
262}
263
264// Helper function to convert bytes to human readable
265pub fn format_size_human_readable(bytes: u64) -> String {
266    const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
267    let mut size = bytes as f64;
268    let mut unit_index = 0;
269
270    while size >= 1024.0 && unit_index < UNITS.len() - 1 {
271        size /= 1024.0;
272        unit_index += 1;
273    }
274
275    if unit_index == 0 {
276        format!("{size:.0} {}", UNITS[unit_index])
277    } else {
278        format!("{size:.1} {}", UNITS[unit_index])
279    }
280}