1use std::path::{Component, Path, PathBuf};
19use std::process::{Command, Stdio};
20
21use anyhow::{Context, Result};
22use ignore::WalkBuilder;
23use tokmd_io_port::MemFs;
24
25#[derive(Debug, Clone)]
26pub struct LicenseCandidates {
27 pub license_files: Vec<PathBuf>,
28 pub metadata_files: Vec<PathBuf>,
29}
30
31pub fn list_files(root: &Path, max_files: Option<usize>) -> Result<Vec<PathBuf>> {
32 if max_files == Some(0) {
34 return Ok(Vec::new());
35 }
36
37 if let Some(mut files) = git_ls_files(root)? {
38 if let Some(limit) = max_files
39 && files.len() > limit
40 {
41 files.truncate(limit);
42 }
43 return Ok(files);
44 }
45
46 let mut files: Vec<PathBuf> = Vec::new();
47 let mut builder = WalkBuilder::new(root);
48 builder.hidden(false);
49 builder.git_ignore(true);
50 builder.git_exclude(true);
51 builder.git_global(true);
52 builder.follow_links(false);
53
54 for entry in builder.build() {
55 let entry = entry?;
56 if !entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
57 continue;
58 }
59 let path = entry.path().to_path_buf();
60 let rel = path.strip_prefix(root).unwrap_or(&path).to_path_buf();
61 files.push(rel);
62 if let Some(limit) = max_files
63 && files.len() >= limit
64 {
65 break;
66 }
67 }
68
69 files.sort();
70 Ok(files)
71}
72
73pub fn list_files_from_memfs(
77 fs: &MemFs,
78 root: &Path,
79 max_files: Option<usize>,
80) -> Result<Vec<PathBuf>> {
81 if max_files == Some(0) {
82 return Ok(Vec::new());
83 }
84
85 let normalized_root = normalize_memfs_path(root);
86 let mut files: Vec<PathBuf> = fs
87 .file_paths()
88 .filter_map(|path| memfs_relative_path(path, &normalized_root))
89 .collect();
90
91 files.sort();
92
93 if let Some(limit) = max_files
94 && files.len() > limit
95 {
96 files.truncate(limit);
97 }
98
99 Ok(files)
100}
101
102pub fn license_candidates(files: &[PathBuf]) -> LicenseCandidates {
103 let mut license_files = Vec::new();
104 let mut metadata_files = Vec::new();
105
106 for rel in files {
107 let name = rel
108 .file_name()
109 .and_then(|n| n.to_str())
110 .unwrap_or("")
111 .to_lowercase();
112 if name == "cargo.toml" || name == "package.json" || name == "pyproject.toml" {
113 metadata_files.push(rel.clone());
114 continue;
115 }
116 if name.starts_with("license") || name.starts_with("copying") || name.starts_with("notice")
117 {
118 license_files.push(rel.clone());
119 }
120 }
121
122 license_files.sort();
123 metadata_files.sort();
124
125 LicenseCandidates {
126 license_files,
127 metadata_files,
128 }
129}
130
131fn git_ls_files(root: &Path) -> Result<Option<Vec<PathBuf>>> {
132 let output = Command::new("git")
133 .arg("-C")
134 .arg(root)
135 .arg("ls-files")
136 .arg("-z")
137 .stdout(Stdio::piped())
138 .stderr(Stdio::null())
139 .output();
140
141 let output = match output {
142 Ok(out) => out,
143 Err(_) => return Ok(None),
144 };
145 if !output.status.success() {
146 return Ok(None);
147 }
148
149 let mut files = Vec::new();
150 let bytes = output.stdout;
151 for part in bytes.split(|b| *b == 0) {
152 if part.is_empty() {
153 continue;
154 }
155 let s = String::from_utf8_lossy(part).to_string();
156 files.push(PathBuf::from(s));
157 }
158
159 if files.is_empty() {
160 return Ok(None);
161 }
162
163 Ok(Some(files))
164}
165
166pub fn file_size(root: &Path, relative: &Path) -> Result<u64> {
167 let path = root.join(relative);
168 let meta =
169 std::fs::metadata(&path).with_context(|| format!("Failed to stat {}", path.display()))?;
170 Ok(meta.len())
171}
172
173pub fn file_size_from_memfs(fs: &MemFs, root: &Path, relative: &Path) -> Result<u64> {
175 let normalized_root = normalize_memfs_path(root);
176 let path = if normalized_root.as_os_str().is_empty() {
177 normalize_memfs_path(relative)
178 } else {
179 normalize_memfs_path(&normalized_root.join(relative))
180 };
181 fs.file_size(&path)
182 .with_context(|| format!("Failed to stat {}", path.display()))
183}
184
185fn normalize_memfs_path(path: &Path) -> PathBuf {
186 let mut normalized = PathBuf::new();
187 for component in path.components() {
188 match component {
189 Component::CurDir | Component::RootDir => {}
190 Component::Normal(part) => normalized.push(part),
191 Component::ParentDir => normalized.push(".."),
192 Component::Prefix(prefix) => normalized.push(prefix.as_os_str()),
193 }
194 }
195 normalized
196}
197
198fn memfs_relative_path(path: &Path, root: &Path) -> Option<PathBuf> {
199 if root.as_os_str().is_empty() {
200 return Some(path.to_path_buf());
201 }
202 path.strip_prefix(root).ok().map(Path::to_path_buf)
203}
204
205#[cfg(test)]
206mod tests {
207 use super::*;
208 use std::fs;
209
210 #[test]
213 fn test_license_candidates_detects_license_files() {
214 let files = vec![
215 PathBuf::from("LICENSE"),
216 PathBuf::from("LICENSE.md"),
217 PathBuf::from("LICENSE-MIT"),
218 PathBuf::from("COPYING"),
219 PathBuf::from("NOTICE"),
220 PathBuf::from("src/main.rs"),
221 ];
222 let result = license_candidates(&files);
223 assert_eq!(result.license_files.len(), 5);
224 assert!(result.metadata_files.is_empty());
225 }
226
227 #[test]
228 fn test_license_candidates_detects_metadata_files() {
229 let files = vec![
230 PathBuf::from("Cargo.toml"),
231 PathBuf::from("package.json"),
232 PathBuf::from("pyproject.toml"),
233 PathBuf::from("src/lib.rs"),
234 ];
235 let result = license_candidates(&files);
236 assert!(result.license_files.is_empty());
237 assert_eq!(result.metadata_files.len(), 3);
238 }
239
240 #[test]
241 fn test_license_candidates_mixed() {
242 let files = vec![
243 PathBuf::from("LICENSE"),
244 PathBuf::from("Cargo.toml"),
245 PathBuf::from("src/main.rs"),
246 ];
247 let result = license_candidates(&files);
248 assert_eq!(result.license_files.len(), 1);
249 assert_eq!(result.metadata_files.len(), 1);
250 }
251
252 #[test]
253 fn test_license_candidates_empty_input() {
254 let result = license_candidates(&[]);
255 assert!(result.license_files.is_empty());
256 assert!(result.metadata_files.is_empty());
257 }
258
259 #[test]
260 fn test_license_candidates_case_insensitive() {
261 let files = vec![PathBuf::from("license"), PathBuf::from("License.txt")];
262 let result = license_candidates(&files);
263 assert_eq!(result.license_files.len(), 2);
264 }
265
266 #[test]
267 fn test_license_candidates_sorted_output() {
268 let files = vec![
269 PathBuf::from("z/Cargo.toml"),
270 PathBuf::from("a/Cargo.toml"),
271 PathBuf::from("z/LICENSE"),
272 PathBuf::from("a/LICENSE"),
273 ];
274 let result = license_candidates(&files);
275 assert_eq!(result.license_files[0], PathBuf::from("a/LICENSE"));
276 assert_eq!(result.license_files[1], PathBuf::from("z/LICENSE"));
277 assert_eq!(result.metadata_files[0], PathBuf::from("a/Cargo.toml"));
278 assert_eq!(result.metadata_files[1], PathBuf::from("z/Cargo.toml"));
279 }
280
281 #[test]
284 fn test_file_size_returns_correct_bytes() {
285 let dir = tempfile::tempdir().unwrap();
286 let content = "hello world";
287 fs::write(dir.path().join("test.txt"), content).unwrap();
288 let size = file_size(dir.path(), Path::new("test.txt")).unwrap();
289 assert_eq!(size, content.len() as u64);
290 }
291
292 #[test]
293 fn test_file_size_missing_file_errors() {
294 let dir = tempfile::tempdir().unwrap();
295 let result = file_size(dir.path(), Path::new("nonexistent.txt"));
296 assert!(result.is_err());
297 }
298
299 #[test]
300 fn test_file_size_empty_file() {
301 let dir = tempfile::tempdir().unwrap();
302 fs::write(dir.path().join("empty.txt"), "").unwrap();
303 let size = file_size(dir.path(), Path::new("empty.txt")).unwrap();
304 assert_eq!(size, 0);
305 }
306
307 #[test]
310 fn test_list_files_max_zero_returns_empty() {
311 let dir = tempfile::tempdir().unwrap();
312 fs::write(dir.path().join("a.rs"), "content").unwrap();
313 let files = list_files(dir.path(), Some(0)).unwrap();
314 assert!(files.is_empty());
315 }
316
317 #[test]
318 fn test_list_files_respects_max_limit() {
319 let dir = tempfile::tempdir().unwrap();
320 fs::create_dir_all(dir.path().join(".git")).unwrap();
322 for i in 0..10 {
323 fs::write(dir.path().join(format!("file{i}.txt")), "x").unwrap();
324 }
325 let files = list_files(dir.path(), Some(3)).unwrap();
326 assert!(files.len() <= 3);
327 }
328
329 #[test]
330 fn test_list_files_deterministic_sort() {
331 let dir = tempfile::tempdir().unwrap();
332 fs::create_dir_all(dir.path().join(".git")).unwrap();
334 fs::create_dir_all(dir.path().join("foo")).unwrap();
335 fs::write(dir.path().join("foo/bar"), "content").unwrap();
336 fs::write(dir.path().join("foo/bar.rs"), "content").unwrap();
337 fs::write(dir.path().join("foo.rs"), "content").unwrap();
338
339 let files = list_files(dir.path(), None).unwrap();
340 let expected = vec![
344 PathBuf::from("foo/bar"),
345 PathBuf::from("foo/bar.rs"),
346 PathBuf::from("foo.rs"),
347 ];
348 let actual: Vec<PathBuf> = files
351 .into_iter()
352 .filter(|p| {
353 let s = p.to_string_lossy();
354 s.starts_with("foo")
355 })
356 .collect();
357 assert_eq!(actual, expected);
359 }
360}
361
362#[cfg(doctest)]
363pub mod readme_doctests {
364 #![doc = include_str!("../README.md")]
365}