1use std::fs::{self, File};
7use std::io::Read;
8use std::path::{Path, PathBuf};
9
10use ignore::WalkBuilder;
11use memmap2::Mmap;
12use rayon::prelude::*;
13
14use crate::support::{configure_walker, count_lines};
15use crate::Result;
16
17pub struct FileStatsOptions {
19 pub root: PathBuf,
21 pub void_dir_name: String,
23 pub include_hidden: bool,
25 pub mmap_threshold: u64,
28}
29
30impl Default for FileStatsOptions {
31 fn default() -> Self {
32 Self {
33 root: PathBuf::from("."),
34 void_dir_name: ".void".to_string(),
35 include_hidden: false,
36 mmap_threshold: 64 * 1024, }
38 }
39}
40
41#[derive(Clone)]
43pub struct FileStatEntry {
44 pub path: String,
46 pub extension: String,
48 pub size: u64,
50 pub lines: u32,
52}
53
54struct PathJob {
56 abs_path: PathBuf,
58 rel_path: String,
60}
61
62pub fn collect_file_stats(opts: FileStatsOptions) -> Result<Vec<FileStatEntry>> {
67 let root = &opts.root;
68 let void_dir_name = opts.void_dir_name.clone();
69
70 let mut builder = WalkBuilder::new(root);
72 let jobs: Vec<PathJob> = configure_walker(&mut builder)
73 .hidden(!opts.include_hidden)
74 .filter_entry(move |entry| {
75 let name = entry.file_name().to_string_lossy();
76 name != void_dir_name
77 && name != "node_modules"
78 && name != ".git"
79 && name != ".DS_Store"
80 && name != "target"
81 })
82 .build()
83 .flatten()
84 .filter(|entry| entry.file_type().is_some_and(|t| t.is_file()))
85 .filter_map(|entry| {
86 let abs_path = entry.path().to_path_buf();
87 let rel_path = abs_path
88 .strip_prefix(root)
89 .ok()?
90 .to_string_lossy()
91 .replace('\\', "/");
92 Some(PathJob { abs_path, rel_path })
93 })
94 .collect();
95
96 let mmap_threshold = opts.mmap_threshold;
98 let stats: Vec<FileStatEntry> = jobs
99 .into_par_iter()
100 .filter_map(|job| process_file(&job.abs_path, &job.rel_path, mmap_threshold))
101 .collect();
102
103 Ok(stats)
104}
105
106fn process_file(abs_path: &Path, rel_path: &str, mmap_threshold: u64) -> Option<FileStatEntry> {
108 let metadata = fs::metadata(abs_path).ok()?;
109 let size = metadata.len();
110
111 let content = load_file_content(abs_path, size, mmap_threshold)?;
113
114 let lines = count_lines(content.as_ref());
116
117 let extension = abs_path
119 .extension()
120 .and_then(|ext| ext.to_str())
121 .map(|ext| ext.to_lowercase())
122 .unwrap_or_else(|| "(no ext)".to_string());
123
124 Some(FileStatEntry {
125 path: rel_path.to_string(),
126 extension,
127 size,
128 lines,
129 })
130}
131
132enum FileContentRef {
134 Mmap(Mmap),
135 Bytes(Vec<u8>),
136}
137
138impl AsRef<[u8]> for FileContentRef {
139 fn as_ref(&self) -> &[u8] {
140 match self {
141 FileContentRef::Mmap(mmap) => mmap.as_ref(),
142 FileContentRef::Bytes(bytes) => bytes.as_slice(),
143 }
144 }
145}
146
147fn load_file_content(path: &Path, size: u64, mmap_threshold: u64) -> Option<FileContentRef> {
149 let use_mmap = mmap_threshold > 0 && size >= mmap_threshold;
150
151 if use_mmap {
152 if let Ok(file) = File::open(path) {
153 if let Ok(mmap) = unsafe { Mmap::map(&file) } {
154 return Some(FileContentRef::Mmap(mmap));
155 }
156 }
157 }
158
159 let mut file = File::open(path).ok()?;
161 let mut bytes = Vec::with_capacity(size as usize);
162 file.read_to_end(&mut bytes).ok()?;
163 Some(FileContentRef::Bytes(bytes))
164}
165
166#[cfg(test)]
167mod tests {
168 use super::*;
169 use std::fs;
170 use tempfile::TempDir;
171
172 #[test]
173 fn collect_basic() {
174 let temp = TempDir::new().unwrap();
175 fs::write(temp.path().join("file1.txt"), "hello\nworld").unwrap();
176 fs::write(temp.path().join("file2.rs"), "fn main() {}").unwrap();
177 fs::create_dir(temp.path().join("subdir")).unwrap();
178 fs::write(temp.path().join("subdir/nested.txt"), "line1\nline2\nline3").unwrap();
179
180 let opts = FileStatsOptions {
181 root: temp.path().to_path_buf(),
182 void_dir_name: ".void".to_string(),
183 include_hidden: false,
184 mmap_threshold: 0,
185 };
186
187 let stats = collect_file_stats(opts).unwrap();
188 assert_eq!(stats.len(), 3);
189
190 let paths: Vec<_> = stats.iter().map(|s| s.path.as_str()).collect();
192 assert!(paths.contains(&"file1.txt"));
193 assert!(paths.contains(&"file2.rs"));
194 assert!(paths.contains(&"subdir/nested.txt"));
195 }
196
197 #[test]
198 fn collect_skips_void_dir() {
199 let temp = TempDir::new().unwrap();
200 fs::write(temp.path().join("file.txt"), "hello").unwrap();
201 fs::create_dir(temp.path().join(".void")).unwrap();
202 fs::write(temp.path().join(".void/secret"), "should be skipped").unwrap();
203
204 let opts = FileStatsOptions {
205 root: temp.path().to_path_buf(),
206 void_dir_name: ".void".to_string(),
207 include_hidden: false,
208 mmap_threshold: 0,
209 };
210
211 let stats = collect_file_stats(opts).unwrap();
212 assert_eq!(stats.len(), 1);
213 assert_eq!(stats[0].path, "file.txt");
214 }
215
216 #[test]
217 fn collect_respects_hidden_option() {
218 let temp = TempDir::new().unwrap();
219 fs::write(temp.path().join("visible.txt"), "visible").unwrap();
220 fs::write(temp.path().join(".hidden"), "hidden").unwrap();
221
222 let opts = FileStatsOptions {
224 root: temp.path().to_path_buf(),
225 void_dir_name: ".void".to_string(),
226 include_hidden: false,
227 mmap_threshold: 0,
228 };
229 let stats = collect_file_stats(opts).unwrap();
230 assert_eq!(stats.len(), 1);
231 assert_eq!(stats[0].path, "visible.txt");
232
233 let opts = FileStatsOptions {
235 root: temp.path().to_path_buf(),
236 void_dir_name: ".void".to_string(),
237 include_hidden: true,
238 mmap_threshold: 0,
239 };
240 let stats = collect_file_stats(opts).unwrap();
241 assert_eq!(stats.len(), 2);
242 }
243
244 #[test]
245 fn extract_extension() {
246 let temp = TempDir::new().unwrap();
247 fs::write(temp.path().join("file.TXT"), "text").unwrap();
248 fs::write(temp.path().join("noext"), "no extension").unwrap();
249
250 let opts = FileStatsOptions {
251 root: temp.path().to_path_buf(),
252 void_dir_name: ".void".to_string(),
253 include_hidden: false,
254 mmap_threshold: 0,
255 };
256
257 let stats = collect_file_stats(opts).unwrap();
258
259 let txt_file = stats.iter().find(|s| s.path == "file.TXT").unwrap();
260 assert_eq!(txt_file.extension, "txt"); let noext_file = stats.iter().find(|s| s.path == "noext").unwrap();
263 assert_eq!(noext_file.extension, "(no ext)");
264 }
265}