Skip to main content

diskforge_core/
file_finder.rs

1use std::cmp::Reverse;
2use std::collections::BinaryHeap;
3use std::ffi::OsStr;
4use std::path::{Path, PathBuf};
5use std::sync::{Arc, Mutex};
6use std::time::{Duration, SystemTime};
7
8use ignore::WalkBuilder;
9
10/// Categories of file types for filtering.
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
12pub enum FileCategory {
13    DiskImage,
14    Archive,
15    Video,
16    Installer,
17    VmImage,
18    Document,
19    Other,
20}
21
22impl std::fmt::Display for FileCategory {
23    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
24        match self {
25            Self::DiskImage => write!(f, "Disk Image"),
26            Self::Archive => write!(f, "Archive"),
27            Self::Video => write!(f, "Video"),
28            Self::Installer => write!(f, "Installer"),
29            Self::VmImage => write!(f, "VM Image"),
30            Self::Document => write!(f, "Document"),
31            Self::Other => write!(f, "Other"),
32        }
33    }
34}
35
36/// A file discovered by the finder.
37#[derive(Debug, Clone)]
38pub struct FoundFile {
39    pub path: PathBuf,
40    pub size: u64,
41    pub mtime: Option<SystemTime>,
42    pub category: FileCategory,
43    pub is_downloaded: bool,
44    pub download_source: Option<String>,
45}
46
47impl PartialEq for FoundFile {
48    fn eq(&self, other: &Self) -> bool {
49        self.size == other.size && self.path == other.path
50    }
51}
52
53impl Eq for FoundFile {}
54
55impl PartialOrd for FoundFile {
56    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
57        Some(self.cmp(other))
58    }
59}
60
61impl Ord for FoundFile {
62    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
63        self.size
64            .cmp(&other.size)
65            .then_with(|| self.path.cmp(&other.path))
66    }
67}
68
69/// Options for the file finder.
70#[derive(Debug, Clone)]
71pub struct FindOptions {
72    /// Root directories to scan.
73    pub root_paths: Vec<PathBuf>,
74    /// Minimum file size in bytes (files smaller than this are skipped).
75    pub min_size: Option<u64>,
76    /// Only include files older than this duration (based on mtime).
77    pub older_than: Option<Duration>,
78    /// Filter to specific file type categories.
79    pub file_types: Option<Vec<FileCategory>>,
80    /// Limit results to the top N largest files (uses BinaryHeap for memory efficiency).
81    pub max_results: Option<usize>,
82    /// Whether to validate file types with magic bytes (default: true).
83    pub validate_types: bool,
84    /// Additional paths to exclude (beyond the defaults).
85    pub extra_exclusions: Vec<String>,
86}
87
88impl Default for FindOptions {
89    fn default() -> Self {
90        Self {
91            root_paths: Vec::new(),
92            min_size: None,
93            older_than: None,
94            file_types: None,
95            max_results: None,
96            validate_types: true,
97            extra_exclusions: Vec::new(),
98        }
99    }
100}
101
102/// Paths that are always excluded from scanning.
103const DEFAULT_EXCLUSIONS: &[&str] = &[
104    "/System",
105    "/usr/bin",
106    "/usr/lib",
107    "/usr/libexec",
108    "/usr/sbin",
109    "/usr/share",
110    "/bin",
111    "/sbin",
112    "/private/var/db",
113];
114
115/// Components that signal we are inside an excluded tree.
116const EXCLUDED_COMPONENTS: &[&str] = &[".git"];
117
118/// Check if a path is inside a .app bundle (should be treated as atomic).
119fn is_inside_app_bundle(path: &Path) -> bool {
120    for ancestor in path.ancestors().skip(1) {
121        if ancestor.extension().is_some_and(|e| e == "app") {
122            return true;
123        }
124    }
125    false
126}
127
128/// Check if a path should be excluded from scanning.
129fn should_exclude(path: &Path, extra_exclusions: &[String]) -> bool {
130    let path_str = path.to_string_lossy();
131
132    // Check default absolute exclusions
133    for excl in DEFAULT_EXCLUSIONS {
134        if path_str.starts_with(excl) {
135            return true;
136        }
137    }
138
139    // Check extra exclusions
140    for excl in extra_exclusions {
141        if path_str.contains(excl.as_str()) {
142            return true;
143        }
144    }
145
146    // Check component-based exclusions
147    for component in path.components() {
148        let s = component.as_os_str().to_string_lossy();
149        for excl in EXCLUDED_COMPONENTS {
150            if s == *excl {
151                return true;
152            }
153        }
154    }
155
156    // Check if inside .app bundle
157    if is_inside_app_bundle(path) {
158        return true;
159    }
160
161    false
162}
163
164/// Detect file category by extension.
165pub fn detect_by_extension(path: &Path) -> FileCategory {
166    let name = path
167        .file_name()
168        .unwrap_or(OsStr::new(""))
169        .to_string_lossy()
170        .to_lowercase();
171
172    // Check compound extensions first
173    if name.ends_with(".tar.gz") || name.ends_with(".tar.bz2") || name.ends_with(".tar.xz") {
174        return FileCategory::Archive;
175    }
176
177    let ext = path
178        .extension()
179        .unwrap_or(OsStr::new(""))
180        .to_string_lossy()
181        .to_lowercase();
182
183    match ext.as_str() {
184        // Disk images
185        "dmg" | "iso" | "img" | "sparseimage" | "sparsebundle" => FileCategory::DiskImage,
186        // Archives
187        "zip" | "tar" | "tgz" | "rar" | "7z" | "xz" | "gz" | "bz2" => FileCategory::Archive,
188        // Videos
189        "mp4" | "mov" | "avi" | "mkv" | "wmv" | "flv" | "m4v" | "webm" => FileCategory::Video,
190        // Installers
191        "pkg" | "mpkg" => FileCategory::Installer,
192        // VM images
193        "vmdk" | "vdi" | "qcow2" | "vhd" => FileCategory::VmImage,
194        // Documents (large ones)
195        "pdf" => FileCategory::Document,
196        _ => FileCategory::Other,
197    }
198}
199
200/// Validate file type using magic bytes via the `infer` crate.
201/// If magic bytes disagree with extension-based detection, trust magic bytes.
202/// If infer returns None (unknown), keep the extension-based category.
203///
204/// Exception: DMG files have the `koly` signature at EOF-512, not at the header,
205/// so `infer` misclassifies them as Archive. For `.dmg` files, trust the extension.
206pub fn validate_by_magic_bytes(path: &Path, extension_category: FileCategory) -> FileCategory {
207    // DMG files: koly magic is at EOF-512, infer reads header and misclassifies as Archive.
208    // Trust the extension-based detection for .dmg files.
209    if extension_category == FileCategory::DiskImage {
210        let ext = path
211            .extension()
212            .unwrap_or_default()
213            .to_string_lossy()
214            .to_lowercase();
215        if ext == "dmg" {
216            return FileCategory::DiskImage;
217        }
218    }
219
220    let Ok(kind) = infer::get_from_path(path) else {
221        return extension_category;
222    };
223    let Some(kind) = kind else {
224        return extension_category;
225    };
226
227    // Map infer MIME types to our categories
228    let mime = kind.mime_type();
229    if mime.starts_with("video/") {
230        return FileCategory::Video;
231    }
232    match mime {
233        "application/x-apple-diskimage" => FileCategory::DiskImage,
234        "application/zip"
235        | "application/gzip"
236        | "application/x-tar"
237        | "application/x-rar-compressed"
238        | "application/x-7z-compressed"
239        | "application/x-bzip2"
240        | "application/x-xz"
241        | "application/zstd" => FileCategory::Archive,
242        "application/pdf" => FileCategory::Document,
243        "application/x-xar" => FileCategory::Installer, // .pkg files use xar format
244        _ => extension_category,
245    }
246}
247
248/// Read the quarantine xattr to detect downloaded files.
249/// Returns (is_downloaded, download_source).
250pub fn check_quarantine(path: &Path) -> (bool, Option<String>) {
251    match xattr::get(path, "com.apple.quarantine") {
252        Ok(Some(value)) => {
253            let value_str = String::from_utf8_lossy(&value);
254            // Format: flag;timestamp;agent_name;UUID
255            let parts: Vec<&str> = value_str.split(';').collect();
256            let source = parts
257                .get(2)
258                .map(|s| s.to_string())
259                .filter(|s| !s.is_empty());
260            (true, source)
261        }
262        _ => (false, None),
263    }
264}
265
266/// Shared counter for progress reporting during parallel walk.
267pub struct ScanProgress {
268    pub files_scanned: std::sync::atomic::AtomicU64,
269}
270
271impl Default for ScanProgress {
272    fn default() -> Self {
273        Self::new()
274    }
275}
276
277impl ScanProgress {
278    pub fn new() -> Self {
279        Self {
280            files_scanned: std::sync::atomic::AtomicU64::new(0),
281        }
282    }
283
284    pub fn count(&self) -> u64 {
285        self.files_scanned
286            .load(std::sync::atomic::Ordering::Relaxed)
287    }
288}
289
290/// Find files matching the given options using parallel traversal.
291///
292/// Uses `ignore::WalkParallel` for 3-6x speedup over single-threaded traversal.
293/// Files are filtered by size during the walk to avoid collecting small files.
294/// Age is determined by `mtime` (NOT atime -- APFS disables atime by default).
295///
296/// Returns results sorted by size descending.
297pub fn find_files(options: &FindOptions, progress: Option<&ScanProgress>) -> Vec<FoundFile> {
298    let now = SystemTime::now();
299    let min_size = options.min_size.unwrap_or(0);
300    let extra_exclusions = options.extra_exclusions.clone();
301
302    // Build results collector based on whether we have a max_results limit
303    let collector: Arc<Mutex<TopNCollector>> =
304        Arc::new(Mutex::new(TopNCollector::new(options.max_results)));
305
306    for root in &options.root_paths {
307        if !root.exists() {
308            continue;
309        }
310
311        let mut builder = WalkBuilder::new(root);
312        builder
313            .hidden(false) // Don't skip hidden files -- users may want to find them
314            .git_ignore(true) // Respect .gitignore by default
315            .git_global(false)
316            .git_exclude(false)
317            .follow_links(false) // Don't follow symlinks (avoid infinite loops)
318            .threads(num_cpus());
319
320        let walker = builder.build_parallel();
321        let collector_ref = Arc::clone(&collector);
322        let extra_excl = extra_exclusions.clone();
323        let older_than = options.older_than;
324        let file_types = options.file_types.clone();
325
326        walker.run(|| {
327            let collector = Arc::clone(&collector_ref);
328            let extra_excl = extra_excl.clone();
329            let file_types = file_types.clone();
330
331            Box::new(move |entry| {
332                let Ok(entry) = entry else {
333                    return ignore::WalkState::Continue;
334                };
335
336                let path = entry.path();
337
338                // Skip excluded paths early (prevents descent into excluded trees)
339                if should_exclude(path, &extra_excl) {
340                    return ignore::WalkState::Skip;
341                }
342
343                // Only process files (not directories)
344                let Some(file_type) = entry.file_type() else {
345                    return ignore::WalkState::Continue;
346                };
347                if !file_type.is_file() {
348                    return ignore::WalkState::Continue;
349                }
350
351                // Increment progress counter
352                if let Some(prog) = progress {
353                    prog.files_scanned
354                        .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
355                }
356
357                // Get metadata for size and mtime
358                let Ok(meta) = entry.metadata() else {
359                    return ignore::WalkState::Continue;
360                };
361
362                let size = meta.len();
363
364                // Filter by minimum size
365                if size < min_size {
366                    return ignore::WalkState::Continue;
367                }
368
369                // Filter by age (mtime)
370                if let Some(max_age) = older_than
371                    && let Ok(mtime) = meta.modified()
372                    && let Ok(age) = now.duration_since(mtime)
373                    && age < max_age
374                {
375                    return ignore::WalkState::Continue;
376                }
377
378                // Detect category by extension (fast, no I/O)
379                let category = detect_by_extension(path);
380
381                // Filter by file type if specified
382                if let Some(ref types) = file_types {
383                    if !types.contains(&category) && category != FileCategory::Other {
384                        return ignore::WalkState::Continue;
385                    }
386                    // If category is Other and we have type filters, still skip
387                    if category == FileCategory::Other && !types.contains(&FileCategory::Other) {
388                        return ignore::WalkState::Continue;
389                    }
390                }
391
392                let mtime = meta.modified().ok();
393
394                let file = FoundFile {
395                    path: path.to_path_buf(),
396                    size,
397                    mtime,
398                    category,
399                    is_downloaded: false,
400                    download_source: None,
401                };
402
403                if let Ok(mut coll) = collector.lock() {
404                    coll.push(file);
405                }
406
407                ignore::WalkState::Continue
408            })
409        });
410    }
411
412    let inner = Arc::try_unwrap(collector)
413        .map(|mutex| mutex.into_inner().unwrap_or_else(|e| e.into_inner()))
414        .unwrap_or_else(|arc| {
415            let lock = arc.lock().unwrap();
416            lock.clone()
417        });
418    let mut results = inner.into_vec();
419
420    // Second pass: validate types with magic bytes and check quarantine xattr
421    if options.validate_types {
422        for file in &mut results {
423            file.category = validate_by_magic_bytes(&file.path, file.category);
424        }
425    }
426
427    // Check quarantine xattr for download detection
428    for file in &mut results {
429        let (is_downloaded, source) = check_quarantine(&file.path);
430        file.is_downloaded = is_downloaded;
431        file.download_source = source;
432    }
433
434    // Re-filter by file type after magic byte validation (category may have changed)
435    if let Some(ref types) = options.file_types {
436        results.retain(|f| types.contains(&f.category));
437    }
438
439    // Sort by size descending
440    results.sort_by(|a, b| b.size.cmp(&a.size));
441    results
442}
443
444/// Memory-efficient top-N collector using a BinaryHeap (min-heap).
445/// When max is set, keeps only the N largest items. O(n log k) instead of O(n log n).
446/// When max is None, collects all items into a Vec.
447#[derive(Clone)]
448struct TopNCollector {
449    max: Option<usize>,
450    heap: BinaryHeap<Reverse<FoundFile>>,
451    vec: Vec<FoundFile>,
452}
453
454impl TopNCollector {
455    fn new(max: Option<usize>) -> Self {
456        Self {
457            max,
458            heap: BinaryHeap::new(),
459            vec: Vec::new(),
460        }
461    }
462
463    fn push(&mut self, file: FoundFile) {
464        match self.max {
465            Some(n) if n > 0 => {
466                self.heap.push(Reverse(file));
467                if self.heap.len() > n {
468                    self.heap.pop(); // Remove smallest
469                }
470            }
471            _ => {
472                self.vec.push(file);
473            }
474        }
475    }
476
477    fn into_vec(self) -> Vec<FoundFile> {
478        match self.max {
479            Some(_) => self
480                .heap
481                .into_sorted_vec()
482                .into_iter()
483                .map(|Reverse(f)| f)
484                .collect(),
485            None => self.vec,
486        }
487    }
488}
489
490/// Get a reasonable thread count for parallel walking.
491fn num_cpus() -> usize {
492    std::thread::available_parallelism()
493        .map(|n| n.get())
494        .unwrap_or(4)
495}
496
497#[cfg(test)]
498mod tests {
499    use super::*;
500    use std::fs;
501
502    fn setup_test_dir(name: &str) -> PathBuf {
503        let tmp = std::env::temp_dir().join(format!("diskforge_test_ff_{name}"));
504        let _ = fs::remove_dir_all(&tmp);
505        fs::create_dir_all(&tmp).unwrap();
506        tmp
507    }
508
509    fn create_file(dir: &Path, name: &str, size: usize) -> PathBuf {
510        let path = dir.join(name);
511        if let Some(parent) = path.parent() {
512            fs::create_dir_all(parent).unwrap();
513        }
514        fs::write(&path, vec![0u8; size]).unwrap();
515        path
516    }
517
518    #[test]
519    fn detect_extension_disk_image() {
520        assert_eq!(
521            detect_by_extension(Path::new("file.dmg")),
522            FileCategory::DiskImage
523        );
524        assert_eq!(
525            detect_by_extension(Path::new("file.iso")),
526            FileCategory::DiskImage
527        );
528        assert_eq!(
529            detect_by_extension(Path::new("file.IMG")),
530            FileCategory::DiskImage
531        );
532    }
533
534    #[test]
535    fn detect_extension_archive() {
536        assert_eq!(
537            detect_by_extension(Path::new("file.zip")),
538            FileCategory::Archive
539        );
540        assert_eq!(
541            detect_by_extension(Path::new("file.tar.gz")),
542            FileCategory::Archive
543        );
544        assert_eq!(
545            detect_by_extension(Path::new("file.7z")),
546            FileCategory::Archive
547        );
548        assert_eq!(
549            detect_by_extension(Path::new("file.rar")),
550            FileCategory::Archive
551        );
552    }
553
554    #[test]
555    fn detect_extension_video() {
556        assert_eq!(
557            detect_by_extension(Path::new("file.mp4")),
558            FileCategory::Video
559        );
560        assert_eq!(
561            detect_by_extension(Path::new("file.mkv")),
562            FileCategory::Video
563        );
564        assert_eq!(
565            detect_by_extension(Path::new("movie.MOV")),
566            FileCategory::Video
567        );
568    }
569
570    #[test]
571    fn detect_extension_installer() {
572        assert_eq!(
573            detect_by_extension(Path::new("setup.pkg")),
574            FileCategory::Installer
575        );
576        assert_eq!(
577            detect_by_extension(Path::new("setup.mpkg")),
578            FileCategory::Installer
579        );
580    }
581
582    #[test]
583    fn detect_extension_vm_image() {
584        assert_eq!(
585            detect_by_extension(Path::new("disk.vmdk")),
586            FileCategory::VmImage
587        );
588        assert_eq!(
589            detect_by_extension(Path::new("disk.qcow2")),
590            FileCategory::VmImage
591        );
592    }
593
594    #[test]
595    fn detect_extension_document() {
596        assert_eq!(
597            detect_by_extension(Path::new("doc.pdf")),
598            FileCategory::Document
599        );
600    }
601
602    #[test]
603    fn detect_extension_other() {
604        assert_eq!(
605            detect_by_extension(Path::new("file.txt")),
606            FileCategory::Other
607        );
608        assert_eq!(
609            detect_by_extension(Path::new("file.rs")),
610            FileCategory::Other
611        );
612        assert_eq!(detect_by_extension(Path::new("noext")), FileCategory::Other);
613    }
614
615    #[test]
616    fn find_files_size_filter() {
617        let tmp = setup_test_dir("size_filter");
618        create_file(&tmp, "small.zip", 100);
619        create_file(&tmp, "big.zip", 10_000);
620        create_file(&tmp, "huge.dmg", 100_000);
621
622        let options = FindOptions {
623            root_paths: vec![tmp.clone()],
624            min_size: Some(5_000),
625            validate_types: false,
626            ..Default::default()
627        };
628
629        let results = find_files(&options, None);
630        assert_eq!(results.len(), 2, "Should find 2 files above 5KB");
631        assert!(
632            results[0].size >= results[1].size,
633            "Should be sorted by size desc"
634        );
635
636        fs::remove_dir_all(&tmp).ok();
637    }
638
639    #[test]
640    fn find_files_max_results() {
641        let tmp = setup_test_dir("max_results");
642        for i in 0..10 {
643            create_file(&tmp, &format!("file{i}.zip"), (i + 1) * 1000);
644        }
645
646        let options = FindOptions {
647            root_paths: vec![tmp.clone()],
648            min_size: Some(1),
649            max_results: Some(3),
650            validate_types: false,
651            ..Default::default()
652        };
653
654        let results = find_files(&options, None);
655        assert_eq!(results.len(), 3, "Should return exactly 3 results");
656        // Should be the 3 largest
657        assert!(results[0].size >= results[1].size);
658        assert!(results[1].size >= results[2].size);
659
660        fs::remove_dir_all(&tmp).ok();
661    }
662
663    #[test]
664    fn find_files_type_filter() {
665        let tmp = setup_test_dir("type_filter");
666        create_file(&tmp, "movie.mp4", 5000);
667        create_file(&tmp, "archive.zip", 5000);
668        create_file(&tmp, "image.dmg", 5000);
669
670        let options = FindOptions {
671            root_paths: vec![tmp.clone()],
672            min_size: Some(1),
673            file_types: Some(vec![FileCategory::Video]),
674            validate_types: false,
675            ..Default::default()
676        };
677
678        let results = find_files(&options, None);
679        assert_eq!(results.len(), 1, "Should find only the video");
680        assert_eq!(results[0].category, FileCategory::Video);
681
682        fs::remove_dir_all(&tmp).ok();
683    }
684
685    #[test]
686    fn find_files_excludes_git_objects() {
687        let tmp = setup_test_dir("git_objects");
688        create_file(&tmp, ".git/objects/pack/bigpack.zip", 10_000);
689        create_file(&tmp, "normal.zip", 10_000);
690
691        let options = FindOptions {
692            root_paths: vec![tmp.clone()],
693            min_size: Some(1),
694            validate_types: false,
695            ..Default::default()
696        };
697
698        let results = find_files(&options, None);
699        assert_eq!(results.len(), 1, "Should exclude .git/objects file");
700        assert!(!results[0].path.to_string_lossy().contains(".git"));
701
702        fs::remove_dir_all(&tmp).ok();
703    }
704
705    #[test]
706    fn find_files_excludes_app_bundle_contents() {
707        let tmp = setup_test_dir("app_bundle");
708        create_file(&tmp, "SomeApp.app/Contents/MacOS/binary", 10_000);
709        create_file(&tmp, "outside.dmg", 10_000);
710
711        let options = FindOptions {
712            root_paths: vec![tmp.clone()],
713            min_size: Some(1),
714            validate_types: false,
715            ..Default::default()
716        };
717
718        let results = find_files(&options, None);
719        assert_eq!(results.len(), 1, "Should exclude file inside .app bundle");
720        assert!(results[0].path.to_string_lossy().contains("outside.dmg"));
721
722        fs::remove_dir_all(&tmp).ok();
723    }
724
725    #[test]
726    fn find_files_progress_counter() {
727        let tmp = setup_test_dir("progress");
728        create_file(&tmp, "a.zip", 1000);
729        create_file(&tmp, "b.zip", 2000);
730        create_file(&tmp, "c.zip", 3000);
731
732        let progress = ScanProgress::new();
733        let options = FindOptions {
734            root_paths: vec![tmp.clone()],
735            min_size: Some(1),
736            validate_types: false,
737            ..Default::default()
738        };
739
740        find_files(&options, Some(&progress));
741        assert!(
742            progress.count() >= 3,
743            "Should have scanned at least 3 files"
744        );
745
746        fs::remove_dir_all(&tmp).ok();
747    }
748
749    #[test]
750    fn dmg_not_misclassified_as_archive() {
751        // Create a fake .dmg file (infer would see arbitrary bytes, not the koly trailer)
752        let tmp = setup_test_dir("dmg_fix");
753        let dmg_path = create_file(&tmp, "test.dmg", 5000);
754
755        // Extension-based should be DiskImage
756        assert_eq!(
757            detect_by_extension(&dmg_path),
758            FileCategory::DiskImage,
759            "Extension detection should return DiskImage for .dmg"
760        );
761
762        // Magic-byte validation should NOT override .dmg to Archive
763        let validated = validate_by_magic_bytes(&dmg_path, FileCategory::DiskImage);
764        assert_eq!(
765            validated,
766            FileCategory::DiskImage,
767            "DMG files must stay as DiskImage even after magic-byte validation"
768        );
769
770        fs::remove_dir_all(&tmp).ok();
771    }
772
773    #[test]
774    fn top_n_collector_unlimited() {
775        let mut coll = TopNCollector::new(None);
776        for i in 0..5 {
777            coll.push(FoundFile {
778                path: PathBuf::from(format!("file{i}")),
779                size: (i + 1) as u64 * 100,
780                mtime: None,
781                category: FileCategory::Other,
782                is_downloaded: false,
783                download_source: None,
784            });
785        }
786        let results = coll.into_vec();
787        assert_eq!(results.len(), 5);
788    }
789
790    #[test]
791    fn top_n_collector_limited() {
792        let mut coll = TopNCollector::new(Some(2));
793        for i in 0..5 {
794            coll.push(FoundFile {
795                path: PathBuf::from(format!("file{i}")),
796                size: (i + 1) as u64 * 100,
797                mtime: None,
798                category: FileCategory::Other,
799                is_downloaded: false,
800                download_source: None,
801            });
802        }
803        let results = coll.into_vec();
804        assert_eq!(results.len(), 2);
805        // Should be the 2 largest (400 and 500)
806        assert!(results.iter().all(|f| f.size >= 400));
807    }
808}