Skip to main content

gravityfile_core/
config.rs

1//! Scan configuration types.
2
3use std::path::PathBuf;
4
5use derive_builder::Builder;
6use globset::{Glob, GlobSet, GlobSetBuilder};
7use serde::{Deserialize, Serialize};
8
9/// Configuration for scanning operations.
10#[derive(Debug, Clone, Builder, Serialize, Deserialize)]
11#[builder(setter(into), build_fn(validate = "Self::validate"))]
12pub struct ScanConfig {
13    /// Root path to scan.
14    pub root: PathBuf,
15
16    /// Follow symbolic links.
17    #[builder(default = "false")]
18    #[serde(default)]
19    pub follow_symlinks: bool,
20
21    /// Cross filesystem boundaries.
22    #[builder(default = "false")]
23    #[serde(default)]
24    pub cross_filesystems: bool,
25
26    /// Use apparent size vs disk usage.
27    #[builder(default = "false")]
28    #[serde(default)]
29    pub apparent_size: bool,
30
31    /// Maximum depth to traverse (None = unlimited).
32    #[builder(default)]
33    #[serde(default)]
34    pub max_depth: Option<u32>,
35
36    /// Patterns to ignore (gitignore-style glob syntax via `globset`).
37    #[builder(default)]
38    #[serde(default)]
39    pub ignore_patterns: Vec<String>,
40
41    /// Number of threads for scanning (0 = auto-detect).
42    #[builder(default = "0")]
43    #[serde(default)]
44    pub threads: usize,
45
46    /// Include hidden files (starting with .).
47    #[builder(default = "true")]
48    #[serde(default = "default_true")]
49    pub include_hidden: bool,
50
51    /// Compute content hashes during scan.
52    #[builder(default = "false")]
53    #[serde(default)]
54    pub compute_hashes: bool,
55
56    /// Minimum file size to hash (skip tiny files).
57    #[builder(default = "4096")]
58    #[serde(default = "default_min_hash_size")]
59    pub min_hash_size: u64,
60
61    /// Compiled glob patterns for ignore matching. Rebuilt from `ignore_patterns`.
62    #[serde(skip)]
63    #[builder(setter(skip))]
64    #[builder(default)]
65    compiled_ignore: Option<GlobSet>,
66}
67
68fn default_true() -> bool {
69    true
70}
71
72fn default_min_hash_size() -> u64 {
73    4096
74}
75
76impl ScanConfigBuilder {
77    fn validate(&self) -> Result<(), String> {
78        if let Some(ref root) = self.root {
79            if root.as_os_str().is_empty() {
80                return Err("Root path cannot be empty".to_string());
81            }
82        } else {
83            return Err("Root path is required".to_string());
84        }
85        Ok(())
86    }
87}
88
89impl ScanConfig {
90    /// Create a new scan config builder.
91    pub fn builder() -> ScanConfigBuilder {
92        ScanConfigBuilder::default()
93    }
94
95    /// Create a simple config for scanning a path.
96    pub fn new(root: impl Into<PathBuf>) -> Self {
97        let mut config = Self {
98            root: root.into(),
99            follow_symlinks: false,
100            cross_filesystems: false,
101            apparent_size: false,
102            max_depth: None,
103            ignore_patterns: Vec::new(),
104            threads: 0,
105            include_hidden: true,
106            compute_hashes: false,
107            min_hash_size: 4096,
108            compiled_ignore: None,
109        };
110        config.compile_patterns();
111        config
112    }
113
114    /// Compile ignore patterns into a `GlobSet` for efficient matching.
115    /// Call this after modifying `ignore_patterns`.
116    pub fn compile_patterns(&mut self) {
117        if self.ignore_patterns.is_empty() {
118            self.compiled_ignore = None;
119            return;
120        }
121        let mut builder = GlobSetBuilder::new();
122        for pattern in &self.ignore_patterns {
123            if let Ok(glob) = Glob::new(pattern) {
124                builder.add(glob);
125            }
126        }
127        self.compiled_ignore = builder.build().ok();
128    }
129
130    /// Check if a name should be ignored based on compiled glob patterns.
131    pub fn should_ignore(&self, name: &str) -> bool {
132        if let Some(ref globset) = self.compiled_ignore {
133            globset.is_match(name)
134        } else if !self.ignore_patterns.is_empty() {
135            // Fallback: compile on-the-fly (patterns not yet compiled)
136            self.ignore_patterns.iter().any(|p| {
137                Glob::new(p)
138                    .map(|g| g.compile_matcher().is_match(name))
139                    .unwrap_or(false)
140            })
141        } else {
142            false
143        }
144    }
145
146    /// Return the compiled `GlobSet` for ignore patterns, if any.
147    ///
148    /// This is the same set used internally by `should_ignore`. Useful when
149    /// callers need an owned, `Send + Sync` handle (e.g. for closures sent
150    /// to thread pools) without recompiling the patterns.
151    pub fn compiled_ignore_set(&self) -> Option<&GlobSet> {
152        self.compiled_ignore.as_ref()
153    }
154
155    /// Check if hidden files should be skipped.
156    #[inline]
157    pub fn should_skip_hidden(&self, name: &str) -> bool {
158        !self.include_hidden && name.starts_with('.')
159    }
160}
161
162impl Default for ScanConfig {
163    fn default() -> Self {
164        Self::new(".")
165    }
166}
167
168#[cfg(test)]
169mod tests {
170    use super::*;
171
172    #[test]
173    fn test_config_builder() {
174        let config = ScanConfig::builder()
175            .root("/home/user")
176            .threads(4usize)
177            .follow_symlinks(true)
178            .build()
179            .unwrap();
180
181        assert_eq!(config.root, PathBuf::from("/home/user"));
182        assert_eq!(config.threads, 4);
183        assert!(config.follow_symlinks);
184    }
185
186    #[test]
187    fn test_config_simple() {
188        let config = ScanConfig::new("/home/user");
189        assert_eq!(config.root, PathBuf::from("/home/user"));
190        assert!(!config.follow_symlinks);
191        assert_eq!(config.threads, 0);
192    }
193
194    #[test]
195    fn test_should_ignore_glob() {
196        let mut config = ScanConfig::builder()
197            .root("/test")
198            .ignore_patterns(vec![
199                "node_modules".to_string(),
200                "*.log".to_string(),
201                "**/*.tmp".to_string(),
202            ])
203            .build()
204            .unwrap();
205        config.compile_patterns();
206
207        assert!(config.should_ignore("node_modules"));
208        assert!(config.should_ignore("test.log"));
209        assert!(config.should_ignore("cache.tmp"));
210        assert!(!config.should_ignore("src"));
211        assert!(!config.should_ignore("test.txt"));
212    }
213
214    #[test]
215    fn test_should_ignore_prefix_glob() {
216        let mut config = ScanConfig::builder()
217            .root("/test")
218            .ignore_patterns(vec!["build*".to_string()])
219            .build()
220            .unwrap();
221        config.compile_patterns();
222
223        assert!(config.should_ignore("build"));
224        assert!(config.should_ignore("build-output"));
225        assert!(!config.should_ignore("rebuild"));
226    }
227
228    #[test]
229    fn test_should_skip_hidden() {
230        let mut config = ScanConfig::new("/test");
231
232        // By default, hidden files are included
233        assert!(!config.should_skip_hidden(".git"));
234
235        // When hidden files are excluded
236        config.include_hidden = false;
237        assert!(config.should_skip_hidden(".git"));
238        assert!(!config.should_skip_hidden("src"));
239    }
240}