fdf 0.9.2

A fast, multi-threaded filesystem search tool with regex/glob support and extremely pretty colours!
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
use crate::SearchConfigError;
use crate::filters::{FileTypeFilter, SizeFilter, TimeFilter};
use crate::fs::{DirEntry, FileType};
use crate::util::glob_to_regex;
use core::num::NonZeroU32;
use core::ops::Deref;
use core::time::Duration;
use regex::bytes::{Regex, RegexBuilder};
use std::time::UNIX_EPOCH;
use thread_local::ThreadLocal;

pub struct TLSRegex {
    base: Regex,
    local: ThreadLocal<Regex>,
}

impl Clone for TLSRegex {
    fn clone(&self) -> Self {
        Self {
            base: self.base.clone(),
            local: ThreadLocal::new(),
        }
    }
}

impl core::fmt::Debug for TLSRegex {
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
        f.debug_struct("TLSRegex")
            .field("base", &self.base)
            .finish_non_exhaustive()
    }
}

impl TLSRegex {
    const fn new(regex: Regex) -> Self {
        Self {
            base: regex,
            local: ThreadLocal::new(),
        }
    }

    #[inline]
    pub fn is_match(&self, path: &[u8]) -> bool {
        self.local.get_or(|| self.base.clone()).is_match(path)
    }
}

/**
This struct holds the configuration for searching a File system via traversal

It includes options for regex matching, hiding hidden files, keeping directories,
matching file extensions, whether to search file names only, depth of search,
and whether to follow symlinks.
*/
#[derive(Clone, Debug)]
#[expect(clippy::struct_excessive_bools, reason = "It's a CLI tool.")]
pub struct SearchConfig {
    /**
    Regular expression pattern for matching file names or paths

    If `None`, matches all files (equivalent to an empty pattern).
    When `file_name_only` is true, only matches against the base filename.
    Uses thread-local storage for efficient multi-threaded regex matching.
    */
    pub(crate) regex_match: Option<TLSRegex>,

    /// Additional required matchers added via `--and`.
    ///
    /// All compiled matchers in this list must match for a path to be accepted.
    pub(crate) and_match: Vec<TLSRegex>,

    /**
    Whether to exclude hidden files and directories

    Hidden files are those whose names start with a dot (`.`).
    When true, these files are filtered out from results.
    */
    pub(crate) hide_hidden: bool,

    /**
    File extension to filter by (case-insensitive)

    If `Some`, only files with this extension are matched.
    The extension should not include the leading dot (e.g., `"txt"` not `".txt"`).
    */
    pub(crate) extension_match: Option<Box<[u8]>>,

    /**
    Whether regex matching applies only to filename vs full path

    If true, regular expressions match only against the file's base name.
    If false, regular expressions match against the full path.
    */
    pub(crate) file_name_only: bool,

    /**
    Maximum directory depth to search

    If `Some(n)`, limits traversal to `n` levels deep.
    If `None`, searches directories to unlimited depth.
    */
    pub(crate) depth: Option<NonZeroU32>,

    /**
    Whether to follow symbolic links during traversal

    If true, symbolic links are followed and their targets are processed.
    If false, symbolic links are treated as regular files.
    */
    pub(crate) follow_symlinks: bool,

    /**
    Filter based on file size constraints

    If `Some`, only files matching the size criteria are included.
    Supports minimum, maximum, and exact size matching.
    */
    pub(crate) size_filter: Option<SizeFilter>,

    /**
    Filter based on file type

    If `Some`, only files of the specified type are included.
    Can filter by file, directory, symlink, etc.
    */
    pub(crate) type_filter: Option<FileTypeFilter>,

    /**
    Filter based on file modification time

    If `Some`, only files matching the time criteria are included.
    Supports relative time ranges (e.g., "last 7 days").
    */
    pub(crate) time_filter: Option<TimeFilter>,

    /**
    Whether to respect `.gitignore` files during traversal.

    When true, entries ignored by inherited `.gitignore` rules are skipped.
    */
    pub(crate) respect_gitignore: bool,

    /// Compiled ignore matcher (`--ignore` + `--ignoreg`) backed by thread-local regex clones.
    pub(crate) ignore_match: Option<TLSRegex>,
}
impl SearchConfig {
    /**
    Constructor for `SearchConfig`

    Builds a regex matcher if a valid pattern is provided, otherwise stores None
    Returns an error if the regex compilation fails
    */
    #[expect(
        clippy::fn_params_excessive_bools,
        clippy::too_many_arguments,
        reason = "Internal convenience"
    )]
    pub(crate) fn new<ToStr: AsRef<str>>(
        pattern: Option<&ToStr>, // ultimately this is CLI internal only
        hide_hidden: bool,
        case_insensitive: bool,
        filenameonly: bool,
        extension_match: Option<Box<[u8]>>,
        depth: Option<NonZeroU32>,
        follow_symlinks: bool,
        size_filter: Option<SizeFilter>,
        type_filter: Option<FileTypeFilter>,
        time_filter: Option<TimeFilter>,
        use_glob: bool,
        and_patterns: Vec<String>,
        respect_gitignore: bool,
        ignore_patterns: Vec<String>,
        ignore_glob_patterns: Vec<String>,
    ) -> core::result::Result<Self, SearchConfigError> {
        let (file_nm, pattern_to_use) = if let Some(patt_ref) = pattern.as_ref() {
            let patt = patt_ref.as_ref();
            let file_name_only = if patt.contains('/') {
                false // Over ride because if it's got a slash, it's gotta be a full path
            } else {
                filenameonly
            };

            let pattern_to_use = if use_glob {
                glob_to_regex(patt).map_err(SearchConfigError::GlobToRegexError)?
            } else {
                patt.into()
            };
            (file_name_only, pattern_to_use)
        } else {
            // No pattern provided, use match-all pattern
            (filenameonly, ".*".into())
        };

        // If pattern is "." or empty, we do not filter by regex, this avoids building a regex (even if its trivial cost)
        let regex_match =
            if pattern_to_use == "." || pattern_to_use == ".*" || pattern_to_use.is_empty() {
                None
            } else {
                let reg = RegexBuilder::new(&pattern_to_use)
                    .case_insensitive(case_insensitive)
                    .dot_matches_new_line(false)
                    .build();

                if let Err(regerror) = reg {
                    return Err(SearchConfigError::RegexError(regerror));
                }
                reg.ok().map(TLSRegex::new)
            };

        let mut and_match = Vec::with_capacity(and_patterns.len());
        let mut file_name_only = file_nm;
        for patt in and_patterns {
            if patt.contains('/') {
                file_name_only = false;
            }

            let f_pattern = if use_glob {
                glob_to_regex(&patt).map_err(SearchConfigError::GlobToRegexError)?
            } else {
                patt
            };

            if f_pattern == "." || f_pattern == ".*" || f_pattern.is_empty() {
                continue;
            }

            let reg = RegexBuilder::new(&f_pattern)
                .case_insensitive(case_insensitive)
                .dot_matches_new_line(false)
                .build()
                .map_err(SearchConfigError::RegexError)?;
            and_match.push(TLSRegex::new(reg));
        }

        let mut ignore_patterns_merged =
            Vec::with_capacity(ignore_patterns.len() + ignore_glob_patterns.len());
        ignore_patterns_merged.extend(ignore_patterns);

        for glob_pattern in ignore_glob_patterns {
            let regex_pattern =
                glob_to_regex(&glob_pattern).map_err(SearchConfigError::GlobToRegexError)?;
            ignore_patterns_merged.push(regex_pattern);
        }

        let ignore_match = if ignore_patterns_merged.is_empty() {
            None
        } else {
            let combined = ignore_patterns_merged
                .iter()
                .map(|patt| format!("(?:{patt})"))
                .collect::<Vec<_>>()
                .join("|");

            let reg = RegexBuilder::new(&combined)
                .case_insensitive(case_insensitive)
                .dot_matches_new_line(false)
                .build()
                .map_err(SearchConfigError::RegexError)?;
            Some(TLSRegex::new(reg))
        };

        Ok(Self {
            regex_match,
            and_match,
            hide_hidden,
            extension_match,
            file_name_only,
            depth,
            follow_symlinks,
            size_filter,
            type_filter,
            time_filter,
            respect_gitignore,
            ignore_match,
        })
    }

    /// Returns true when the provided path should be ignored by configured ignore patterns.
    #[inline]
    #[must_use]
    pub fn matches_ignore_path(&self, path: &[u8]) -> bool {
        self.ignore_match
            .as_ref()
            .is_some_and(|reg| reg.is_match(path))
    }

    /// Evaluates a custom predicate function against a path
    #[inline]
    #[must_use]
    pub fn matches_with<F: Fn(&[u8]) -> bool>(&self, path: &[u8], predicate: F) -> bool {
        predicate(path)
    }

    /// Checks for extension match, branchlessly.
    /// The entry is guaranteed to be a filename (no slashes) of length >= 1.
    #[inline]
    pub fn matches_extension<S>(&self, entry: &S) -> bool
    where
        S: Deref<Target = [u8]>,
    {
        debug_assert!(
            !entry.contains(&b'/'),
            "the filename contains a slash, some arithmetic has gone wrong somewhere!"
        ); // Ensure that the entry is a file name and not a path (internal invariant for my own arithmetic)

        debug_assert!(
            entry.len() >= 1,
            "internal invariant should always have length>=1 in matches extension"
        );

        self.extension_match.as_deref().is_none_or(|ext| {
            let name_len = entry.len(); // guaranteed >= 1
            // Saturating arithmetic keeps both indices in-bounds with no branches.
            // When name_len < ext_len + 1 the suffix slice will be shorter than ext,
            // so eq_ignore_ascii_case returns false without any explicit length guard
            let suffix_start = name_len.saturating_sub(ext.len());
            let dot_idx = suffix_start.saturating_sub(1);
            // SAFETY:
            // - `suffix_start` is in  [0, name_len], so `name[suffix_start..]` is a valid subslice.
            // - `dot_idx` saturates to 0 when suffix_start == 0; name_len >= 1 guarantees index 0 is valid.
            let suffix = unsafe { entry.get_unchecked(suffix_start..) };
            // SAFETY: as above
            let dot_byte = unsafe { *entry.get_unchecked(dot_idx) };
            dot_byte == b'.' && suffix.eq_ignore_ascii_case(ext)
        })
    }

    /**
    Applies the configured size filter to a directory entry, if any.
    For regular files the size is checked directly.
    For symlinks, the target is resolved first and then checked if it is a regular file.
    Other file types are ignored.
    */
    #[inline]
    #[must_use]
    #[allow(clippy::cast_sign_loss)] // Sign loss does not matter here
    pub fn matches_size(&self, entry: &DirEntry) -> bool {
        let Some(filter_size) = self.size_filter else {
            return true; // No filter means always match
        };

        match entry.file_type {
            FileType::RegularFile => entry
                .file_size()
                .ok()
                .is_some_and(|sz| filter_size.is_within_size(sz)),
            //Check if it exists first, then call stat..
            FileType::Symlink => {
                entry.exists()
                    && entry.get_stat().is_ok_and(|statted| {
                        FileType::from_stat(&statted) == FileType::RegularFile
                            && filter_size.is_within_size(statted.st_size as _)
                    })
            }

            _ => false,
        }
    }

    /// Applies a type filter using `FileTypeFilter` enum
    /// Supports common file types: file, dir, symlink, device, pipe, etc
    #[inline]
    #[must_use]
    pub fn matches_type(&self, entry: &DirEntry) -> bool {
        let Some(type_filter) = self.type_filter else {
            return true;
        };

        match type_filter {
            FileTypeFilter::File => entry.is_regular_file(),
            FileTypeFilter::Directory => entry.is_dir(),
            FileTypeFilter::Symlink => entry.is_symlink(),
            FileTypeFilter::Pipe => entry.is_pipe(),
            FileTypeFilter::CharDevice => entry.is_char_device(),
            FileTypeFilter::BlockDevice => entry.is_block_device(),
            FileTypeFilter::Socket => entry.is_socket(),
            FileTypeFilter::Unknown => entry.is_unknown(),
            FileTypeFilter::Executable => entry.is_executable(),
            FileTypeFilter::Empty => entry.is_empty(),
        }
    }

    /// Applies time-based filtering to files based on modification time
    /// Returns true if the file's modification time matches the filter criteria
    #[inline]
    #[must_use]
    pub fn matches_time(&self, entry: &DirEntry) -> bool {
        let Some(time_filter) = self.time_filter else {
            return true; // No filter means always match
        };

        // Get the modification time from the file and convert to SystemTime
        entry
            .modified_time()
            .ok()
            .and_then(|datetime| datetime.timestamp_nanos_opt())
            .and_then(|nanos| UNIX_EPOCH.checked_add(Duration::from_nanos(nanos.cast_unsigned())))
            .is_some_and(|systime| time_filter.matches_time(systime))
    }

    /// Checks if the path or file name matches the regex filter
    /// If `full_path` is false, only checks the filename
    #[inline]
    #[must_use]
    pub fn matches_path(&self, dir: &DirEntry, full_path: bool) -> bool {
        // Use arithmetic to avoid branching costs.
        let index_amount = usize::from(!full_path) * dir.file_name_index();

        // SAFETY: we are always indexing within bounds.
        let candidate = unsafe { dir.get_unchecked(index_amount..) };

        self.regex_match
            .as_ref()
            .is_none_or(|reg| reg.is_match(candidate))
            && self.and_match.iter().all(|reg| reg.is_match(candidate))
    }
}