difflore_core/domain/glob_match.rs
1//! Shared file-pattern glob matcher (B8).
2//!
3//! Two call sites need the *same* "does this path satisfy a rule's
4//! JSON-encoded `file_patterns` glob list?" logic but with deliberately
5//! **opposite** error handling:
6//!
7//! * Rule retrieval (`context::retrieval::rules`) over-recalls: a parse
8//! error or an unbuildable glob set must NOT silently drop a rule —
9//! better to surface a maybe-irrelevant rule than to lose real signal
10//! on a corrupt `file_patterns` blob.
11//! * Observation attribution (`cloud::observations::dedup`) drops: a
12//! parse error means we cannot prove the rule applies to the touched
13//! file, so the safe call for attribution is to NOT credit it.
14//!
15//! The matching algorithm is identical; only the error verdict differs.
16//! That divergence is now an explicit [`GlobErrorPolicy`] argument
17//! instead of two drifting copies.
18
19use globset::{Glob, GlobSetBuilder};
20
21/// What to return when the pattern blob can't be turned into a usable
22/// glob set (malformed JSON, no parseable globs, or `GlobSet::build`
23/// failure). Absent / empty / `[]` patterns are *not* errors — those are
24/// "universal rule" and always match regardless of policy.
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum GlobErrorPolicy {
27 /// Over-recall: treat an unusable pattern blob as "matches". Used by
28 /// rule retrieval so a corrupt blob never costs us recall.
29 OverRecall,
30 /// Drop: treat an unusable pattern blob as "does not match". Used by
31 /// observation attribution so we never credit a rule we can't prove
32 /// applies.
33 Drop,
34}
35
36impl GlobErrorPolicy {
37 #[inline]
38 const fn verdict(self) -> bool {
39 match self {
40 Self::OverRecall => true,
41 Self::Drop => false,
42 }
43 }
44}
45
46/// Decide whether `path` is in scope for a rule whose `patterns_json` is
47/// a JSON array of glob strings (e.g. `["src/**/*.rs", "**/*.toml"]`).
48///
49/// Returns `true` when:
50/// * `patterns_json` is `None`, blank, or parses to an empty list
51/// (universal rule — always in scope), or
52/// * any glob in the list matches the normalised `path`.
53///
54/// On a recoverable failure (malformed JSON, zero parseable globs, or a
55/// `GlobSet` build error) the result is governed by `on_error` so the
56/// two call sites keep their intentional opposite behaviour.
57///
58/// `path` is normalised before matching: a leading `/` is stripped and
59/// `\` is rewritten to `/` so Windows-style paths agree with
60/// forward-slash globs.
61pub fn glob_match(patterns_json: Option<&str>, path: &str, on_error: GlobErrorPolicy) -> bool {
62 let Some(raw) = patterns_json.map(str::trim).filter(|s| !s.is_empty()) else {
63 return true;
64 };
65 let patterns: Vec<String> = match serde_json::from_str(raw) {
66 Ok(v) => v,
67 Err(_) => return on_error.verdict(),
68 };
69 if patterns.is_empty() {
70 return true;
71 }
72
73 let mut builder = GlobSetBuilder::new();
74 let mut added = false;
75 for pattern in &patterns {
76 if let Ok(glob) = Glob::new(pattern.trim()) {
77 builder.add(glob);
78 added = true;
79 }
80 }
81 if !added {
82 return on_error.verdict();
83 }
84 let Ok(set) = builder.build() else {
85 return on_error.verdict();
86 };
87
88 // Normalise: drop a leading slash and convert backslashes so
89 // Windows paths agree with Unix-style globs.
90 let normalised = path.trim_start_matches('/').replace('\\', "/");
91 set.is_match(&normalised)
92}
93
94#[cfg(test)]
95mod tests {
96 use super::*;
97
98 #[test]
99 fn absent_or_empty_is_universal_under_either_policy() {
100 for policy in [GlobErrorPolicy::OverRecall, GlobErrorPolicy::Drop] {
101 assert!(glob_match(None, "src/lib.rs", policy));
102 assert!(glob_match(Some(""), "src/lib.rs", policy));
103 assert!(glob_match(Some(" "), "src/lib.rs", policy));
104 assert!(glob_match(Some("[]"), "src/lib.rs", policy));
105 }
106 }
107
108 #[test]
109 fn glob_match_basic_and_path_normalisation() {
110 for policy in [GlobErrorPolicy::OverRecall, GlobErrorPolicy::Drop] {
111 assert!(glob_match(
112 Some(r#"["**/*.rs"]"#),
113 "tokio/src/io/uring.rs",
114 policy
115 ));
116 assert!(!glob_match(
117 Some(r#"["**/*.rs"]"#),
118 ".github/workflows/ci.yml",
119 policy
120 ));
121 assert!(glob_match(
122 Some(r#"["tokio/src/io/**"]"#),
123 "tokio/src/io/uring.rs",
124 policy
125 ));
126 assert!(!glob_match(
127 Some(r#"["tokio/src/io/**"]"#),
128 "tokio/src/runtime/mod.rs",
129 policy
130 ));
131 // Backslash + leading-slash normalisation.
132 assert!(glob_match(
133 Some(r#"["tokio/src/io/**"]"#),
134 "tokio\\src\\io\\uring.rs",
135 policy
136 ));
137 assert!(glob_match(
138 Some(r#"["tokio/src/io/**"]"#),
139 "/tokio/src/io/uring.rs",
140 policy
141 ));
142 }
143 }
144
145 #[test]
146 fn malformed_blob_follows_policy() {
147 // Malformed JSON.
148 assert!(glob_match(
149 Some("not-json"),
150 "any/path.rs",
151 GlobErrorPolicy::OverRecall
152 ));
153 assert!(!glob_match(
154 Some("not-json"),
155 "any/path.rs",
156 GlobErrorPolicy::Drop
157 ));
158 // JSON object, not the expected array.
159 assert!(glob_match(
160 Some("{}"),
161 "any/path.rs",
162 GlobErrorPolicy::OverRecall
163 ));
164 assert!(!glob_match(
165 Some("{}"),
166 "any/path.rs",
167 GlobErrorPolicy::Drop
168 ));
169 }
170}