Skip to main content

difflore_core/domain/
glob_match.rs

1//! Shared file-pattern glob matcher (B8).
2//!
3//! Two call sites need the *same* "does this path satisfy a rule's
4//! JSON-encoded `file_patterns` glob list?" logic but with deliberately
5//! **opposite** error handling:
6//!
7//! * Rule retrieval (`context::retrieval::rules`) over-recalls: a parse
8//!   error or an unbuildable glob set must NOT silently drop a rule —
9//!   better to surface a maybe-irrelevant rule than to lose real signal
10//!   on a corrupt `file_patterns` blob.
11//! * Observation attribution (`cloud::observations::dedup`) drops: a
12//!   parse error means we cannot prove the rule applies to the touched
13//!   file, so the safe call for attribution is to NOT credit it.
14//!
15//! The matching algorithm is identical; only the error verdict differs.
16//! That divergence is now an explicit [`GlobErrorPolicy`] argument
17//! instead of two drifting copies.
18
19use globset::{Glob, GlobSetBuilder};
20
21/// What to return when the pattern blob can't be turned into a usable
22/// glob set (malformed JSON, no parseable globs, or `GlobSet::build`
23/// failure). Absent / empty / `[]` patterns are *not* errors — those are
24/// "universal rule" and always match regardless of policy.
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum GlobErrorPolicy {
27    /// Over-recall: treat an unusable pattern blob as "matches". Used by
28    /// rule retrieval so a corrupt blob never costs us recall.
29    OverRecall,
30    /// Drop: treat an unusable pattern blob as "does not match". Used by
31    /// observation attribution so we never credit a rule we can't prove
32    /// applies.
33    Drop,
34}
35
36impl GlobErrorPolicy {
37    #[inline]
38    const fn verdict(self) -> bool {
39        match self {
40            Self::OverRecall => true,
41            Self::Drop => false,
42        }
43    }
44}
45
46/// Decide whether `path` is in scope for a rule whose `patterns_json` is
47/// a JSON array of glob strings (e.g. `["src/**/*.rs", "**/*.toml"]`).
48///
49/// Returns `true` when:
50/// * `patterns_json` is `None`, blank, or parses to an empty list
51///   (universal rule — always in scope), or
52/// * any glob in the list matches the normalised `path`.
53///
54/// On a recoverable failure (malformed JSON, zero parseable globs, or a
55/// `GlobSet` build error) the result is governed by `on_error` so the
56/// two call sites keep their intentional opposite behaviour.
57///
58/// `path` is normalised before matching: a leading `/` is stripped and
59/// `\` is rewritten to `/` so Windows-style paths agree with
60/// forward-slash globs.
61pub fn glob_match(patterns_json: Option<&str>, path: &str, on_error: GlobErrorPolicy) -> bool {
62    let Some(raw) = patterns_json.map(str::trim).filter(|s| !s.is_empty()) else {
63        return true;
64    };
65    let patterns: Vec<String> = match serde_json::from_str(raw) {
66        Ok(v) => v,
67        Err(_) => return on_error.verdict(),
68    };
69    if patterns.is_empty() {
70        return true;
71    }
72
73    let mut builder = GlobSetBuilder::new();
74    let mut added = false;
75    for pattern in &patterns {
76        if let Ok(glob) = Glob::new(pattern.trim()) {
77            builder.add(glob);
78            added = true;
79        }
80    }
81    if !added {
82        return on_error.verdict();
83    }
84    let Ok(set) = builder.build() else {
85        return on_error.verdict();
86    };
87
88    // Normalise: drop a leading slash and convert backslashes so
89    // Windows paths agree with Unix-style globs.
90    let normalised = path.trim_start_matches('/').replace('\\', "/");
91    set.is_match(&normalised)
92}
93
94#[cfg(test)]
95mod tests {
96    use super::*;
97
98    #[test]
99    fn absent_or_empty_is_universal_under_either_policy() {
100        for policy in [GlobErrorPolicy::OverRecall, GlobErrorPolicy::Drop] {
101            assert!(glob_match(None, "src/lib.rs", policy));
102            assert!(glob_match(Some(""), "src/lib.rs", policy));
103            assert!(glob_match(Some("   "), "src/lib.rs", policy));
104            assert!(glob_match(Some("[]"), "src/lib.rs", policy));
105        }
106    }
107
108    #[test]
109    fn glob_match_basic_and_path_normalisation() {
110        for policy in [GlobErrorPolicy::OverRecall, GlobErrorPolicy::Drop] {
111            assert!(glob_match(
112                Some(r#"["**/*.rs"]"#),
113                "tokio/src/io/uring.rs",
114                policy
115            ));
116            assert!(!glob_match(
117                Some(r#"["**/*.rs"]"#),
118                ".github/workflows/ci.yml",
119                policy
120            ));
121            assert!(glob_match(
122                Some(r#"["tokio/src/io/**"]"#),
123                "tokio/src/io/uring.rs",
124                policy
125            ));
126            assert!(!glob_match(
127                Some(r#"["tokio/src/io/**"]"#),
128                "tokio/src/runtime/mod.rs",
129                policy
130            ));
131            // Backslash + leading-slash normalisation.
132            assert!(glob_match(
133                Some(r#"["tokio/src/io/**"]"#),
134                "tokio\\src\\io\\uring.rs",
135                policy
136            ));
137            assert!(glob_match(
138                Some(r#"["tokio/src/io/**"]"#),
139                "/tokio/src/io/uring.rs",
140                policy
141            ));
142        }
143    }
144
145    #[test]
146    fn malformed_blob_follows_policy() {
147        // Malformed JSON.
148        assert!(glob_match(
149            Some("not-json"),
150            "any/path.rs",
151            GlobErrorPolicy::OverRecall
152        ));
153        assert!(!glob_match(
154            Some("not-json"),
155            "any/path.rs",
156            GlobErrorPolicy::Drop
157        ));
158        // JSON object, not the expected array.
159        assert!(glob_match(
160            Some("{}"),
161            "any/path.rs",
162            GlobErrorPolicy::OverRecall
163        ));
164        assert!(!glob_match(
165            Some("{}"),
166            "any/path.rs",
167            GlobErrorPolicy::Drop
168        ));
169    }
170}