heroforge_core/fs/
find.rs

1//! File finding and pattern matching operations.
2//!
3//! This module provides a powerful find API for searching files in Heroforge repositories
4//! using glob patterns, ignore rules, and various filters.
5//!
6//! # Examples
7//!
8//! ```no_run
9//! use heroforge_core::Repository;
10//! use heroforge_core::fs::Find;
11//!
12//! let repo = Repository::open("project.forge")?;
13//!
14//! // Find all Rust files
15//! let rust_files = Find::new(&repo)
16//!     .pattern("**/*.rs")
17//!     .paths()?;
18//!
19//! // Find with filters
20//! let filtered = Find::new(&repo)
21//!     .pattern("**/*.rs")
22//!     .ignore("target/**")
23//!     .ignore_hidden()
24//!     .max_depth(5)
25//!     .paths()?;
26//! # Ok::<(), heroforge_core::FossilError>(())
27//! ```
28
29use crate::error::Result;
30use crate::repo::Repository;
31use std::collections::HashSet;
32
33/// Unix-style file permissions.
34///
35/// # Examples
36///
37/// ```
38/// use heroforge_core::fs::Permissions;
39///
40/// // Create from octal
41/// let perms = Permissions::from_octal(0o755);
42/// assert_eq!(perms.to_string_repr(), "rwxr-xr-x");
43///
44/// // Use preset
45/// let exec = Permissions::executable();
46/// assert_eq!(exec.to_octal(), 0o755);
47/// ```
48#[derive(Debug, Clone, Copy, PartialEq, Eq)]
49pub struct Permissions {
50    /// Read permission for owner
51    pub owner_read: bool,
52    /// Write permission for owner
53    pub owner_write: bool,
54    /// Execute permission for owner
55    pub owner_exec: bool,
56    /// Read permission for group
57    pub group_read: bool,
58    /// Write permission for group
59    pub group_write: bool,
60    /// Execute permission for group
61    pub group_exec: bool,
62    /// Read permission for others
63    pub other_read: bool,
64    /// Write permission for others
65    pub other_write: bool,
66    /// Execute permission for others
67    pub other_exec: bool,
68}
69
70impl Permissions {
71    /// Create permissions from octal (e.g., 0o755).
72    pub fn from_octal(mode: u32) -> Self {
73        Self {
74            owner_read: mode & 0o400 != 0,
75            owner_write: mode & 0o200 != 0,
76            owner_exec: mode & 0o100 != 0,
77            group_read: mode & 0o040 != 0,
78            group_write: mode & 0o020 != 0,
79            group_exec: mode & 0o010 != 0,
80            other_read: mode & 0o004 != 0,
81            other_write: mode & 0o002 != 0,
82            other_exec: mode & 0o001 != 0,
83        }
84    }
85
86    /// Convert to octal representation.
87    pub fn to_octal(&self) -> u32 {
88        let mut mode = 0u32;
89        if self.owner_read {
90            mode |= 0o400;
91        }
92        if self.owner_write {
93            mode |= 0o200;
94        }
95        if self.owner_exec {
96            mode |= 0o100;
97        }
98        if self.group_read {
99            mode |= 0o040;
100        }
101        if self.group_write {
102            mode |= 0o020;
103        }
104        if self.group_exec {
105            mode |= 0o010;
106        }
107        if self.other_read {
108            mode |= 0o004;
109        }
110        if self.other_write {
111            mode |= 0o002;
112        }
113        if self.other_exec {
114            mode |= 0o001;
115        }
116        mode
117    }
118
119    /// Convert to string representation (e.g., "rwxr-xr-x").
120    pub fn to_string_repr(&self) -> String {
121        format!(
122            "{}{}{}{}{}{}{}{}{}",
123            if self.owner_read { 'r' } else { '-' },
124            if self.owner_write { 'w' } else { '-' },
125            if self.owner_exec { 'x' } else { '-' },
126            if self.group_read { 'r' } else { '-' },
127            if self.group_write { 'w' } else { '-' },
128            if self.group_exec { 'x' } else { '-' },
129            if self.other_read { 'r' } else { '-' },
130            if self.other_write { 'w' } else { '-' },
131            if self.other_exec { 'x' } else { '-' },
132        )
133    }
134
135    /// Create permissions for a regular file (644 = rw-r--r--).
136    pub fn file() -> Self {
137        Self::from_octal(0o644)
138    }
139
140    /// Create permissions for an executable file (755 = rwxr-xr-x).
141    pub fn executable() -> Self {
142        Self::from_octal(0o755)
143    }
144
145    /// Create permissions for a read-only file (444 = r--r--r--).
146    pub fn readonly() -> Self {
147        Self::from_octal(0o444)
148    }
149}
150
151/// Type of file entry in the repository.
152#[derive(Debug, Clone, PartialEq, Eq)]
153pub enum FileType {
154    /// Regular file
155    Regular,
156    /// Executable file
157    Executable,
158    /// Symbolic link with target path
159    Symlink(String),
160}
161
162/// Extended file information including permissions and type.
163#[derive(Debug, Clone)]
164pub struct FileEntry {
165    /// File path relative to repository root
166    pub path: String,
167    /// SHA3-256 hash of file content
168    pub hash: String,
169    /// Type of file (regular, executable, symlink)
170    pub file_type: FileType,
171    /// Unix permissions (if tracked)
172    pub permissions: Option<Permissions>,
173    /// File size in bytes
174    pub size: Option<usize>,
175}
176
177/// Result of a find operation with metadata.
178#[derive(Debug, Clone)]
179pub struct FindResult {
180    /// Matched files with metadata
181    pub files: Vec<FileEntry>,
182    /// Total number of matches
183    pub count: usize,
184    /// Number of directories traversed during search
185    pub dirs_traversed: usize,
186}
187
188/// File finder with pattern matching support.
189///
190/// Create via [`Find::new()`] and chain methods to configure the search.
191///
192/// # Examples
193///
194/// ## Basic Pattern Matching
195///
196/// ```no_run
197/// use heroforge_core::Repository;
198/// use heroforge_core::fs::Find;
199///
200/// let repo = Repository::open("project.forge")?;
201///
202/// // Find all Rust source files
203/// let rust_files = Find::new(&repo)
204///     .pattern("**/*.rs")
205///     .paths()?;
206///
207/// // Find multiple patterns
208/// let source_files = Find::new(&repo)
209///     .patterns(&["**/*.rs", "**/*.toml", "**/*.md"])
210///     .paths()?;
211/// # Ok::<(), heroforge_core::FossilError>(())
212/// ```
213///
214/// ## Ignore Patterns
215///
216/// ```no_run
217/// # use heroforge_core::Repository;
218/// # use heroforge_core::fs::Find;
219/// # let repo = Repository::open("project.forge")?;
220/// // Exclude build artifacts
221/// let files = Find::new(&repo)
222///     .pattern("**/*.rs")
223///     .ignore("target/**")
224///     .ignore("**/generated/**")
225///     .paths()?;
226///
227/// // Use common gitignore patterns
228/// let clean = Find::new(&repo)
229///     .pattern("**/*")
230///     .use_gitignore()
231///     .ignore_hidden()
232///     .paths()?;
233/// # Ok::<(), heroforge_core::FossilError>(())
234/// ```
235///
236/// ## Directory and Depth Limits
237///
238/// ```no_run
239/// # use heroforge_core::Repository;
240/// # use heroforge_core::fs::Find;
241/// # let repo = Repository::open("project.forge")?;
242/// // Search only in src directory
243/// let src_files = Find::new(&repo)
244///     .in_dir("src")
245///     .pattern("**/*.rs")
246///     .paths()?;
247///
248/// // Limit depth to 2 levels
249/// let shallow = Find::new(&repo)
250///     .pattern("**/*")
251///     .max_depth(2)
252///     .paths()?;
253/// # Ok::<(), heroforge_core::FossilError>(())
254/// ```
255pub struct Find<'a> {
256    repo: &'a Repository,
257    base_commit: Option<String>,
258    patterns: Vec<String>,
259    ignore_patterns: Vec<String>,
260    ignore_hidden: bool,
261    ignore_case: bool,
262    max_depth: Option<usize>,
263    file_type_filter: Option<FileType>,
264    min_size: Option<usize>,
265    max_size: Option<usize>,
266    base_dir: Option<String>,
267}
268
269impl<'a> Find<'a> {
270    /// Create a new Find instance for the given repository.
271    pub fn new(repo: &'a Repository) -> Self {
272        Self {
273            repo,
274            base_commit: None,
275            patterns: Vec::new(),
276            ignore_patterns: Vec::new(),
277            ignore_hidden: false,
278            ignore_case: false,
279            max_depth: None,
280            file_type_filter: None,
281            min_size: None,
282            max_size: None,
283            base_dir: None,
284        }
285    }
286
287    /// Search at a specific commit hash.
288    pub fn at_commit(mut self, hash: &str) -> Self {
289        self.base_commit = Some(hash.to_string());
290        self
291    }
292
293    /// Search on trunk (default).
294    pub fn on_trunk(mut self) -> Self {
295        self.base_commit = None;
296        self
297    }
298
299    /// Search on a specific branch.
300    pub fn on_branch(mut self, branch: &str) -> Result<Self> {
301        let tip = self.repo.branch_tip_internal(branch)?;
302        self.base_commit = Some(tip.hash);
303        Ok(self)
304    }
305
306    /// Add a glob pattern to match files.
307    ///
308    /// Supports standard glob syntax:
309    /// - `*` matches any sequence of characters in a path segment
310    /// - `**` matches any sequence of path segments
311    /// - `?` matches any single character
312    /// - `[abc]` matches any character in the brackets
313    pub fn pattern(mut self, pattern: &str) -> Self {
314        self.patterns.push(pattern.to_string());
315        self
316    }
317
318    /// Add multiple glob patterns at once.
319    pub fn patterns(mut self, patterns: &[&str]) -> Self {
320        for p in patterns {
321            self.patterns.push(p.to_string());
322        }
323        self
324    }
325
326    /// Exclude files matching this pattern.
327    pub fn ignore(mut self, pattern: &str) -> Self {
328        self.ignore_patterns.push(pattern.to_string());
329        self
330    }
331
332    /// Add multiple ignore patterns at once.
333    pub fn ignore_patterns(mut self, patterns: &[&str]) -> Self {
334        for p in patterns {
335            self.ignore_patterns.push(p.to_string());
336        }
337        self
338    }
339
340    /// Exclude hidden files (files starting with `.`).
341    pub fn ignore_hidden(mut self) -> Self {
342        self.ignore_hidden = true;
343        self
344    }
345
346    /// Apply common gitignore-style patterns.
347    pub fn use_gitignore(mut self) -> Self {
348        self.ignore_patterns.extend(vec![
349            ".git/**".to_string(),
350            ".gitignore".to_string(),
351            "node_modules/**".to_string(),
352            "target/**".to_string(),
353            "*.pyc".to_string(),
354            "__pycache__/**".to_string(),
355            ".DS_Store".to_string(),
356            "*.swp".to_string(),
357            "*.swo".to_string(),
358            "*~".to_string(),
359        ]);
360        self
361    }
362
363    /// Enable case-insensitive pattern matching.
364    pub fn ignore_case(mut self) -> Self {
365        self.ignore_case = true;
366        self
367    }
368
369    /// Limit search to a maximum directory depth.
370    pub fn max_depth(mut self, depth: usize) -> Self {
371        self.max_depth = Some(depth);
372        self
373    }
374
375    /// Only match regular files (exclude executables and symlinks).
376    pub fn files_only(mut self) -> Self {
377        self.file_type_filter = Some(FileType::Regular);
378        self
379    }
380
381    /// Only match executable files.
382    pub fn executables_only(mut self) -> Self {
383        self.file_type_filter = Some(FileType::Executable);
384        self
385    }
386
387    /// Only match symbolic links.
388    pub fn symlinks_only(mut self) -> Self {
389        self.file_type_filter = Some(FileType::Symlink(String::new()));
390        self
391    }
392
393    /// Filter to files at least this size (in bytes).
394    pub fn min_size(mut self, bytes: usize) -> Self {
395        self.min_size = Some(bytes);
396        self
397    }
398
399    /// Filter to files at most this size (in bytes).
400    pub fn max_size(mut self, bytes: usize) -> Self {
401        self.max_size = Some(bytes);
402        self
403    }
404
405    /// Restrict search to a specific directory.
406    pub fn in_dir(mut self, dir: &str) -> Self {
407        self.base_dir = Some(dir.trim_end_matches('/').to_string());
408        self
409    }
410
411    /// Execute the search and return full results with metadata.
412    pub fn execute(&self) -> Result<FindResult> {
413        let commit_hash = if let Some(ref hash) = self.base_commit {
414            hash.clone()
415        } else {
416            let tip = self.repo.branch_tip_internal("trunk")?;
417            tip.hash
418        };
419
420        let all_files = self.repo.list_files_internal(&commit_hash)?;
421        let mut matched_files = Vec::new();
422        let mut dirs_seen = HashSet::new();
423
424        let match_patterns: Vec<glob::Pattern> = self
425            .patterns
426            .iter()
427            .filter_map(|p| {
428                let p = if self.ignore_case {
429                    p.to_lowercase()
430                } else {
431                    p.clone()
432                };
433                glob::Pattern::new(&p).ok()
434            })
435            .collect();
436
437        let ignore_patterns: Vec<glob::Pattern> = self
438            .ignore_patterns
439            .iter()
440            .filter_map(|p| {
441                let p = if self.ignore_case {
442                    p.to_lowercase()
443                } else {
444                    p.clone()
445                };
446                glob::Pattern::new(&p).ok()
447            })
448            .collect();
449
450        for file in all_files {
451            let file_path = if self.ignore_case {
452                file.name.to_lowercase()
453            } else {
454                file.name.clone()
455            };
456
457            if let Some(idx) = file.name.rfind('/') {
458                dirs_seen.insert(file.name[..idx].to_string());
459            }
460
461            if let Some(ref base) = self.base_dir {
462                if !file.name.starts_with(base) && !file.name.starts_with(&format!("{}/", base)) {
463                    continue;
464                }
465            }
466
467            if let Some(max_depth) = self.max_depth {
468                let depth = file.name.matches('/').count();
469                let base_depth = self
470                    .base_dir
471                    .as_ref()
472                    .map(|b| b.matches('/').count())
473                    .unwrap_or(0);
474                if depth - base_depth > max_depth {
475                    continue;
476                }
477            }
478
479            if self.ignore_hidden {
480                let file_name = file.name.rsplit('/').next().unwrap_or(&file.name);
481                if file_name.starts_with('.') {
482                    continue;
483                }
484            }
485
486            let ignored = ignore_patterns.iter().any(|p| p.matches(&file_path));
487            if ignored {
488                continue;
489            }
490
491            let matches = if match_patterns.is_empty() {
492                true
493            } else {
494                match_patterns.iter().any(|p| p.matches(&file_path))
495            };
496
497            if matches {
498                matched_files.push(FileEntry {
499                    path: file.name.clone(),
500                    hash: file.hash.clone(),
501                    file_type: FileType::Regular,
502                    permissions: file.permissions.as_ref().map(|p| {
503                        Permissions::from_octal(u32::from_str_radix(p, 8).unwrap_or(0o644))
504                    }),
505                    size: file.size,
506                });
507            }
508        }
509
510        Ok(FindResult {
511            count: matched_files.len(),
512            files: matched_files,
513            dirs_traversed: dirs_seen.len(),
514        })
515    }
516
517    /// Execute and return just the file paths.
518    pub fn paths(&self) -> Result<Vec<String>> {
519        Ok(self.execute()?.files.into_iter().map(|f| f.path).collect())
520    }
521
522    /// Execute and return only the count of matching files.
523    pub fn count(&self) -> Result<usize> {
524        Ok(self.execute()?.count)
525    }
526}
527
528/// Convenience function to find files matching a pattern.
529///
530/// # Examples
531///
532/// ```no_run
533/// use heroforge_core::Repository;
534/// use heroforge_core::fs::find;
535///
536/// let repo = Repository::open("project.forge")?;
537/// let rust_files = find(&repo, "**/*.rs")?;
538/// # Ok::<(), heroforge_core::FossilError>(())
539/// ```
540pub fn find(repo: &Repository, pattern: &str) -> Result<Vec<String>> {
541    Find::new(repo).pattern(pattern).paths()
542}
543
544/// Convenience function to count files matching a pattern.
545pub fn count(repo: &Repository, pattern: &str) -> Result<usize> {
546    Find::new(repo).pattern(pattern).count()
547}
548
549/// Check if a file exists at the given path.
550pub fn exists(repo: &Repository, path: &str) -> Result<bool> {
551    let tip = repo.branch_tip_internal("trunk")?;
552    let files = repo.list_files_internal(&tip.hash)?;
553    Ok(files.iter().any(|f| f.name == path))
554}
555
556/// Check if a path represents a directory.
557pub fn is_dir(repo: &Repository, path: &str) -> Result<bool> {
558    let tip = repo.branch_tip_internal("trunk")?;
559    let files = repo.list_files_internal(&tip.hash)?;
560    let prefix = format!("{}/", path.trim_end_matches('/'));
561    Ok(files.iter().any(|f| f.name.starts_with(&prefix)))
562}
563
564/// Get detailed information about a file.
565pub fn stat(repo: &Repository, path: &str) -> Result<Option<FileEntry>> {
566    let tip = repo.branch_tip_internal("trunk")?;
567    let files = repo.list_files_internal(&tip.hash)?;
568
569    for file in files {
570        if file.name == path {
571            let content = repo.read_file_internal(&tip.hash, path)?;
572            let file_type = if let Ok(text) = String::from_utf8(content.clone()) {
573                if text.starts_with("link ") {
574                    FileType::Symlink(text[5..].trim().to_string())
575                } else {
576                    FileType::Regular
577                }
578            } else {
579                FileType::Regular
580            };
581
582            return Ok(Some(FileEntry {
583                path: file.name,
584                hash: file.hash,
585                file_type,
586                permissions: file
587                    .permissions
588                    .as_ref()
589                    .map(|p| Permissions::from_octal(u32::from_str_radix(p, 8).unwrap_or(0o644))),
590                size: Some(content.len()),
591            }));
592        }
593    }
594
595    Ok(None)
596}
597
598/// Calculate total size of files matching a glob pattern.
599pub fn du(repo: &Repository, pattern: &str) -> Result<usize> {
600    let tip = repo.branch_tip_internal("trunk")?;
601    let files = repo.find_files_internal(&tip.hash, pattern)?;
602    let mut total = 0;
603
604    for file in files {
605        if let Ok(content) = repo.read_file_internal(&tip.hash, &file.name) {
606            total += content.len();
607        }
608    }
609
610    Ok(total)
611}
612
613/// List all symbolic links in the repository.
614pub fn list_symlinks(repo: &Repository) -> Result<Vec<(String, String)>> {
615    let tip = repo.branch_tip_internal("trunk")?;
616    let files = repo.list_files_internal(&tip.hash)?;
617    let mut symlinks = Vec::new();
618
619    for file in files {
620        if let Ok(content) = repo.read_file_internal(&tip.hash, &file.name) {
621            if let Ok(text) = String::from_utf8(content) {
622                if text.starts_with("link ") {
623                    let target = text[5..].trim().to_string();
624                    symlinks.push((file.name, target));
625                }
626            }
627        }
628    }
629
630    Ok(symlinks)
631}
632
633#[cfg(test)]
634mod tests {
635    use super::*;
636
637    #[test]
638    fn test_permissions_from_octal() {
639        let perms = Permissions::from_octal(0o755);
640        assert!(perms.owner_read);
641        assert!(perms.owner_write);
642        assert!(perms.owner_exec);
643        assert!(perms.group_read);
644        assert!(!perms.group_write);
645        assert!(perms.group_exec);
646        assert!(perms.other_read);
647        assert!(!perms.other_write);
648        assert!(perms.other_exec);
649    }
650
651    #[test]
652    fn test_permissions_to_octal() {
653        let perms = Permissions::from_octal(0o644);
654        assert_eq!(perms.to_octal(), 0o644);
655    }
656
657    #[test]
658    fn test_permissions_string_repr() {
659        assert_eq!(Permissions::from_octal(0o755).to_string_repr(), "rwxr-xr-x");
660        assert_eq!(Permissions::from_octal(0o644).to_string_repr(), "rw-r--r--");
661        assert_eq!(Permissions::from_octal(0o600).to_string_repr(), "rw-------");
662    }
663
664    #[test]
665    fn test_permissions_presets() {
666        assert_eq!(Permissions::file().to_octal(), 0o644);
667        assert_eq!(Permissions::executable().to_octal(), 0o755);
668        assert_eq!(Permissions::readonly().to_octal(), 0o444);
669    }
670
671    #[test]
672    fn test_permissions_roundtrip() {
673        for mode in [0o000, 0o111, 0o222, 0o333, 0o444, 0o555, 0o666, 0o777] {
674            let perms = Permissions::from_octal(mode);
675            assert_eq!(perms.to_octal(), mode);
676        }
677    }
678
679    #[test]
680    fn test_permissions_all_bits() {
681        let perms = Permissions::from_octal(0o777);
682        assert!(perms.owner_read);
683        assert!(perms.owner_write);
684        assert!(perms.owner_exec);
685        assert!(perms.group_read);
686        assert!(perms.group_write);
687        assert!(perms.group_exec);
688        assert!(perms.other_read);
689        assert!(perms.other_write);
690        assert!(perms.other_exec);
691        assert_eq!(perms.to_string_repr(), "rwxrwxrwx");
692    }
693
694    #[test]
695    fn test_permissions_no_bits() {
696        let perms = Permissions::from_octal(0o000);
697        assert!(!perms.owner_read);
698        assert!(!perms.owner_write);
699        assert!(!perms.owner_exec);
700        assert!(!perms.group_read);
701        assert!(!perms.group_write);
702        assert!(!perms.group_exec);
703        assert!(!perms.other_read);
704        assert!(!perms.other_write);
705        assert!(!perms.other_exec);
706        assert_eq!(perms.to_string_repr(), "---------");
707    }
708
709    #[test]
710    fn test_file_type_equality() {
711        assert_eq!(FileType::Regular, FileType::Regular);
712        assert_eq!(FileType::Executable, FileType::Executable);
713        assert_eq!(
714            FileType::Symlink("target".to_string()),
715            FileType::Symlink("target".to_string())
716        );
717        assert_ne!(FileType::Regular, FileType::Executable);
718        assert_ne!(
719            FileType::Symlink("a".to_string()),
720            FileType::Symlink("b".to_string())
721        );
722    }
723
724    #[test]
725    fn test_find_result_empty() {
726        let result = FindResult {
727            files: vec![],
728            count: 0,
729            dirs_traversed: 0,
730        };
731        assert_eq!(result.count, 0);
732        assert!(result.files.is_empty());
733    }
734
735    #[test]
736    fn test_find_result_with_files() {
737        let result = FindResult {
738            files: vec![
739                FileEntry {
740                    path: "src/main.rs".to_string(),
741                    hash: "abc123".to_string(),
742                    file_type: FileType::Regular,
743                    permissions: Some(Permissions::file()),
744                    size: Some(100),
745                },
746                FileEntry {
747                    path: "src/lib.rs".to_string(),
748                    hash: "def456".to_string(),
749                    file_type: FileType::Regular,
750                    permissions: Some(Permissions::file()),
751                    size: Some(200),
752                },
753            ],
754            count: 2,
755            dirs_traversed: 1,
756        };
757        assert_eq!(result.count, 2);
758        assert_eq!(result.files.len(), 2);
759        assert_eq!(result.dirs_traversed, 1);
760    }
761
762    #[test]
763    fn test_file_entry_with_symlink() {
764        let entry = FileEntry {
765            path: "link".to_string(),
766            hash: "hash".to_string(),
767            file_type: FileType::Symlink("target/path".to_string()),
768            permissions: None,
769            size: None,
770        };
771        assert_eq!(entry.path, "link");
772        if let FileType::Symlink(target) = entry.file_type {
773            assert_eq!(target, "target/path");
774        } else {
775            panic!("Expected symlink");
776        }
777    }
778}