Skip to main content

recoco_core/ops/sources/shared/
pattern_matcher.rs

1// ReCoco is a Rust-only fork of CocoIndex, by [CocoIndex](https://CocoIndex)
2// Original code from CocoIndex is copyrighted by CocoIndex
3// SPDX-FileCopyrightText: 2025-2026 CocoIndex (upstream)
4// SPDX-FileContributor: CocoIndex Contributors
5//
6// All modifications from the upstream for ReCoco are copyrighted by Knitli Inc.
7// SPDX-FileCopyrightText: 2026 Knitli Inc. (ReCoco)
8// SPDX-FileContributor: Adam Poulemanos <adam@knit.li>
9//
10// Both the upstream CocoIndex code and the ReCoco modifications are licensed under the Apache-2.0 License.
11// SPDX-License-Identifier: Apache-2.0
12
13use crate::ops::sdk::*;
14use globset::{Glob, GlobSet, GlobSetBuilder};
15
16/// Builds a GlobSet from a vector of pattern strings
17fn build_glob_set(patterns: Vec<String>) -> Result<GlobSet> {
18    let mut builder = GlobSetBuilder::new();
19    for pattern in patterns {
20        builder.add(Glob::new(pattern.as_str())?);
21    }
22    Ok(builder.build()?)
23}
24
25/// Pattern matcher that handles include and exclude patterns for files
26#[derive(Debug)]
27pub struct PatternMatcher {
28    /// Patterns matching full path of files to be included.
29    included_glob_set: Option<GlobSet>,
30    /// Patterns matching full path of files and directories to be excluded.
31    /// If a directory is excluded, all files and subdirectories within it are also excluded.
32    excluded_glob_set: Option<GlobSet>,
33}
34
35impl PatternMatcher {
36    /// Create a new PatternMatcher from optional include and exclude pattern vectors
37    pub fn new(
38        included_patterns: Option<Vec<String>>,
39        excluded_patterns: Option<Vec<String>>,
40    ) -> Result<Self> {
41        let included_glob_set = included_patterns.map(build_glob_set).transpose()?;
42        let excluded_glob_set = excluded_patterns.map(build_glob_set).transpose()?;
43
44        Ok(Self {
45            included_glob_set,
46            excluded_glob_set,
47        })
48    }
49
50    /// Check if a file or directory is excluded by the exclude patterns
51    /// Can be called on directories to prune traversal on excluded directories.
52    pub fn is_excluded(&self, path: &str) -> bool {
53        self.excluded_glob_set
54            .as_ref()
55            .is_some_and(|glob_set| glob_set.is_match(path))
56    }
57
58    /// Check if a file should be included based on both include and exclude patterns
59    /// Should be called for each file.
60    pub fn is_file_included(&self, path: &str) -> bool {
61        self.included_glob_set
62            .as_ref()
63            .is_none_or(|glob_set| glob_set.is_match(path))
64            && !self.is_excluded(path)
65    }
66}
67
68#[cfg(test)]
69mod tests {
70    use super::*;
71
72    #[test]
73    fn test_pattern_matcher_no_patterns() {
74        let matcher = PatternMatcher::new(None, None).unwrap();
75        assert!(matcher.is_file_included("test.txt"));
76        assert!(matcher.is_file_included("path/to/file.rs"));
77        assert!(!matcher.is_excluded("anything"));
78    }
79
80    #[test]
81    fn test_pattern_matcher_include_only() {
82        let matcher =
83            PatternMatcher::new(Some(vec!["*.txt".to_string(), "*.rs".to_string()]), None).unwrap();
84
85        assert!(matcher.is_file_included("test.txt"));
86        assert!(matcher.is_file_included("main.rs"));
87        assert!(!matcher.is_file_included("image.png"));
88    }
89
90    #[test]
91    fn test_pattern_matcher_exclude_only() {
92        let matcher =
93            PatternMatcher::new(None, Some(vec!["*.tmp".to_string(), "*.log".to_string()]))
94                .unwrap();
95
96        assert!(matcher.is_file_included("test.txt"));
97        assert!(!matcher.is_file_included("temp.tmp"));
98        assert!(!matcher.is_file_included("debug.log"));
99    }
100
101    #[test]
102    fn test_pattern_matcher_both_patterns() {
103        let matcher = PatternMatcher::new(
104            Some(vec!["*.txt".to_string()]),
105            Some(vec!["*temp*".to_string()]),
106        )
107        .unwrap();
108
109        assert!(matcher.is_file_included("test.txt"));
110        assert!(!matcher.is_file_included("temp.txt")); // excluded despite matching include
111        assert!(!matcher.is_file_included("main.rs")); // doesn't match include
112    }
113}