1use std::path::{Path, PathBuf};
5
6use rayon::prelude::*;
7use serde::{Deserialize, Serialize};
8
9use super::{DetectorFile, DetectorSpec, PatternSpec, QualityIssue, SpecError, validate_detector};
10
11const DETECTOR_CACHE_VERSION: u32 = 2;
12
13#[derive(Serialize, Deserialize)]
14struct DetectorCacheFile {
15 version: u32,
16 detectors: Vec<DetectorSpec>,
17}
18
19pub fn save_detector_cache(
31 detectors: &[DetectorSpec],
32 cache_path: &Path,
33) -> Result<(), std::io::Error> {
34 let json = serde_json::to_vec(&DetectorCacheFile {
35 version: DETECTOR_CACHE_VERSION,
36 detectors: detectors.to_vec(),
37 })?;
38 std::fs::write(cache_path, json)
39}
40
41pub fn load_detector_cache(cache_path: &Path, source_dir: &Path) -> Option<Vec<DetectorSpec>> {
61 let cache_meta = std::fs::metadata(cache_path).ok()?;
62 let cache_mtime = cache_meta.modified().ok()?;
63
64 let entries = std::fs::read_dir(source_dir).ok()?;
66 for entry in entries.flatten() {
67 let path = entry.path();
68 if path.extension().is_some_and(|ext| ext == "toml") {
69 let is_stale = std::fs::metadata(&path)
70 .and_then(|meta| meta.modified())
71 .is_ok_and(|mtime| mtime > cache_mtime);
72
73 if is_stale {
74 return None; }
76 }
77 }
78
79 let data = std::fs::read(cache_path).ok()?;
80 let cache: DetectorCacheFile = serde_json::from_slice(&data).ok()?;
81 if cache.version != DETECTOR_CACHE_VERSION {
82 return None;
83 }
84
85 let validated: Vec<DetectorSpec> = cache
87 .detectors
88 .into_iter()
89 .filter(|spec| {
90 let issues = validate_detector(spec);
91 let has_errors = issues
92 .iter()
93 .any(|issue| matches!(issue, QualityIssue::Error(_)));
94 if has_errors {
95 tracing::warn!(
96 "cached detector '{}' failed quality gate, discarding",
97 spec.id
98 );
99 }
100 !has_errors
101 })
102 .collect();
103
104 if validated.is_empty() {
105 tracing::warn!("all cached detectors failed validation, falling back to TOML load");
106 return None;
107 }
108
109 Some(validated)
110}
111
112pub fn load_detectors(dir: &Path) -> Result<Vec<DetectorSpec>, SpecError> {
125 load_detectors_with_gate(dir, true)
126}
127
128pub fn load_detectors_with_gate(
140 dir: &Path,
141 enforce_gate: bool,
142) -> Result<Vec<DetectorSpec>, SpecError> {
143 let entries = std::fs::read_dir(dir).map_err(|e| SpecError::ReadFile {
145 path: dir.display().to_string(),
146 source: e,
147 })?;
148 let toml_paths: Vec<PathBuf> = entries
149 .filter_map(|entry| {
150 let entry = entry.ok()?;
151 let path = entry.path();
152 if path.extension().is_some_and(|ext| ext == "toml") {
153 Some(path)
154 } else {
155 None
156 }
157 })
158 .collect();
159
160 let parsed: Vec<Option<DetectorSpec>> = toml_paths
162 .par_iter()
163 .map(|path| {
164 let mut skipped = 0;
165 let mut errors = Vec::new();
166 read_detector_file(path, &mut skipped, &mut errors)
167 })
168 .collect();
169
170 let mut load_state = DetectorLoadState::default();
172 let mut detectors = Vec::with_capacity(parsed.len());
173
174 for spec in parsed.into_iter().flatten() {
175 if should_reject_detector(
176 &spec,
177 enforce_gate,
178 &mut load_state.gate_rejected,
179 &mut load_state.total_warnings,
180 ) {
181 continue;
182 }
183 detectors.push(spec);
184 }
185
186 if should_inject_github_classic_pat_detector(&detectors) {
187 inject_github_classic_pat_detector(&mut detectors);
188 }
189
190 log_load_summary(&load_state);
191
192 detectors.sort_by(|a, b| a.id.cmp(&b.id));
193 Ok(detectors)
194}
195
196#[derive(Default)]
197struct DetectorLoadState {
198 skipped: usize,
199 load_errors: Vec<String>,
200 gate_rejected: usize,
201 total_warnings: usize,
202}
203
204fn log_load_summary(state: &DetectorLoadState) {
205 if state.skipped > 0 {
206 tracing::info!("skipped {} unparseable files", state.skipped);
207 }
208 for error in &state.load_errors {
209 tracing::info!("detector load issue: {error}");
210 }
211 if state.gate_rejected > 0 {
212 tracing::info!("quality gate: rejected {} detectors", state.gate_rejected);
213 }
214 if state.total_warnings > 0 {
215 tracing::debug!("quality gate: {} warnings", state.total_warnings);
216 }
217}
218
219fn read_detector_file(
220 path: &Path,
221 skipped: &mut usize,
222 load_errors: &mut Vec<String>,
223) -> Option<DetectorSpec> {
224 let contents = match std::fs::read_to_string(path) {
225 Ok(contents) => contents,
226 Err(error) => {
227 let message = format!("failed to read {}: {}", path.display(), error);
228 tracing::debug!("{message}");
229 load_errors.push(message);
230 *skipped += 1;
231 return None;
232 }
233 };
234
235 match toml::from_str::<DetectorFile>(&contents) {
236 Ok(file) => Some(file.detector),
237 Err(error) => {
238 let message = format!("failed to parse {}: {}", path.display(), error);
239 tracing::debug!("{message}");
240 load_errors.push(message);
241 *skipped += 1;
242 None
243 }
244 }
245}
246
247fn should_reject_detector(
248 spec: &DetectorSpec,
249 enforce_gate: bool,
250 gate_rejected: &mut usize,
251 total_warnings: &mut usize,
252) -> bool {
253 let mut has_errors = false;
254 for issue in validate_detector(spec) {
255 match issue {
256 QualityIssue::Warning(warning) => {
257 tracing::debug!("quality: {} — {}", spec.id, warning);
258 *total_warnings += 1;
259 }
260 QualityIssue::Error(error) => {
261 tracing::warn!("failed to validate detector: {}: {}", spec.id, error);
262 has_errors = true;
263 }
264 }
265 }
266
267 if has_errors && enforce_gate {
268 *gate_rejected += 1;
269 return true;
270 }
271
272 false
273}
274
275pub(super) fn inject_github_classic_pat_detector(detectors: &mut Vec<DetectorSpec>) {
276 let Some(github_fine_grained) = detectors
277 .iter()
278 .find(|d| d.id == "github-pat-fine-grained")
279 .cloned()
280 else {
281 return;
282 };
283
284 let mut compat = github_fine_grained;
285 compat.id = "github-classic-pat".into();
286 compat.name = "GitHub Classic PAT".into();
287 compat.keywords = vec!["ghp_".into(), "github".into()];
288 compat.patterns = vec![PatternSpec {
289 regex: "ghp_[a-zA-Z0-9]{36,40}".into(),
290 description: Some("GitHub classic personal access token".into()),
291 group: None,
292 }];
293
294 detectors.push(compat);
295}
296
297fn should_inject_github_classic_pat_detector(detectors: &[DetectorSpec]) -> bool {
298 !detectors.iter().any(|d| d.id == "github-classic-pat")
299 && detectors.iter().any(|d| d.id == "github-pat-fine-grained")
300}