1use uuid::Uuid;
13
14use crate::store::record::{
15 Category, ConfidenceScore, GotchaRecord, Priority, QualityScore, QualityTier, Record,
16 RecordSource,
17};
18
19const MARKERS: &[&str] = &[
22 "DO NOT REMOVE",
23 "DO NOT EDIT",
24 "DO NOT MODIFY",
25 "DO NOT DELETE",
26 "SECURITY:",
27 "SECURITY-CRITICAL",
28];
29
30const MAX_LINE_LEN: usize = 400;
32
33pub const MAX_MARKER_CANDIDATES: usize = 200;
35
36#[derive(Debug, Clone, PartialEq, Eq)]
40pub struct OwnerRule {
41 pub pattern: String,
42 pub owners: Vec<String>,
43}
44
45pub fn parse_codeowners(content: &str) -> Vec<OwnerRule> {
48 let mut rules = Vec::new();
49 for raw in content.lines() {
50 let line = raw.split('#').next().unwrap_or("").trim();
51 if line.is_empty() {
52 continue;
53 }
54 let mut parts = line.split_whitespace();
55 let Some(pattern) = parts.next() else {
56 continue;
57 };
58 let owners: Vec<String> = parts.map(str::to_string).collect();
59 if owners.is_empty() {
60 continue;
61 }
62 rules.push(OwnerRule {
63 pattern: pattern.to_string(),
64 owners,
65 });
66 }
67 rules
68}
69
70#[derive(Debug, Clone, PartialEq, Eq)]
74pub struct MarkerHit {
75 pub path: String,
76 pub line: usize,
77 pub marker: String,
78 pub text: String,
79}
80
81pub fn scan_markers(path: &str, content: &str) -> Vec<MarkerHit> {
83 let mut hits = Vec::new();
84 for (i, raw) in content.lines().enumerate() {
85 if raw.len() > MAX_LINE_LEN {
86 continue;
87 }
88 let upper = raw.to_uppercase();
89 if let Some(marker) = MARKERS.iter().find(|m| upper.contains(**m)) {
90 hits.push(MarkerHit {
91 path: path.to_string(),
92 line: i + 1,
93 marker: (*marker).to_string(),
94 text: raw.trim().to_string(),
95 });
96 }
97 }
98 hits
99}
100
101#[allow(clippy::too_many_arguments)]
106fn candidate_record(
107 key: String,
108 rule: String,
109 reason: String,
110 severity: Priority,
111 affected_files: Vec<String>,
112 tags: Vec<String>,
113 device_id: Uuid,
114 logical_clock: u64,
115 now: u64,
116) -> Record {
117 let gotcha = GotchaRecord {
118 rule: rule.clone(),
119 reason,
120 severity: severity.clone(),
121 affected_files,
122 ref_url: None,
123 discovered_session: now,
124 confirmed: false,
125 };
126 let mut rec = Record::layer0_file_stub(&key, device_id, logical_clock, now);
127 rec.category = Category::Gotcha;
128 rec.source = RecordSource::Import;
129 rec.priority = severity;
130 rec.value = rule;
131 rec.quality = QualityScore {
132 value: 0.50,
133 tier: QualityTier::Acceptable,
134 signals: vec![],
135 computed_at: now,
136 };
137 rec.confidence = ConfidenceScore::for_new_record(&RecordSource::Import);
140 rec.tags = tags;
141 rec.payload = serde_json::to_value(&gotcha).ok();
142 rec
143}
144
145pub fn codeowners_candidates(
147 rules: &[OwnerRule],
148 device_id: Uuid,
149 clock_start: u64,
150 now: u64,
151) -> Vec<Record> {
152 rules
153 .iter()
154 .enumerate()
155 .map(|(i, r)| {
156 let owners = r.owners.join(", ");
157 let rule = format!(
158 "`{}` is owned by {} (CODEOWNERS) — coordinate changes with them.",
159 r.pattern, owners
160 );
161 let reason = format!("Listed in CODEOWNERS: {} → {}.", r.pattern, owners);
162 let key = format!("gotcha:codeowners:{}", r.pattern);
163 candidate_record(
164 key,
165 rule,
166 reason,
167 Priority::Normal,
168 vec![r.pattern.clone()],
169 vec!["codeowners".into(), "auto-generated".into()],
170 device_id,
171 clock_start + i as u64,
172 now,
173 )
174 })
175 .collect()
176}
177
178pub fn marker_candidates(
180 hits: &[MarkerHit],
181 device_id: Uuid,
182 clock_start: u64,
183 now: u64,
184) -> Vec<Record> {
185 hits.iter()
186 .take(MAX_MARKER_CANDIDATES)
187 .enumerate()
188 .map(|(i, h)| {
189 let rule = format!(
190 "`{}` carries a `{}` marker at line {} — preserve it through edits.",
191 h.path, h.marker, h.line
192 );
193 let reason = format!("Developer marker in source: {}", h.text);
194 let key = format!("gotcha:marker:{}:{}", h.path, h.line);
195 candidate_record(
197 key,
198 rule,
199 reason,
200 Priority::High,
201 vec![h.path.clone()],
202 vec!["code-marker".into(), "auto-generated".into()],
203 device_id,
204 clock_start + i as u64,
205 now,
206 )
207 })
208 .collect()
209}
210
211pub fn build_candidates(
215 codeowners: Option<&str>,
216 files: &[(String, String)],
217 device_id: Uuid,
218 clock_start: u64,
219 now: u64,
220) -> Vec<Record> {
221 let mut out = Vec::new();
222 let mut clock = clock_start;
223
224 if let Some(content) = codeowners {
225 let rules = parse_codeowners(content);
226 let recs = codeowners_candidates(&rules, device_id, clock, now);
227 clock += recs.len() as u64;
228 out.extend(recs);
229 }
230
231 let mut hits = Vec::new();
232 for (path, content) in files {
233 hits.extend(scan_markers(path, content));
234 }
235 out.extend(marker_candidates(&hits, device_id, clock, now));
236
237 out
238}
239
240#[cfg(test)]
241mod tests {
242 use super::*;
243
244 fn dev() -> Uuid {
245 Uuid::nil()
246 }
247
248 fn is_unconfirmed_gotcha(rec: &Record) -> bool {
249 rec.category == Category::Gotcha
250 && rec.source == RecordSource::Import
251 && rec
252 .payload
253 .as_ref()
254 .and_then(|p| serde_json::from_value::<GotchaRecord>(p.clone()).ok())
255 .is_some_and(|g| !g.confirmed)
256 }
257
258 #[test]
259 fn parse_codeowners_ignores_comments_and_blank_and_ownerless() {
260 let content = "\
261# comment\n\
262\n\
263src/payments/** @pay-team @alice\n\
264docs/ # trailing comment\n\
265*.rs @rustfolk\n";
266 let rules = parse_codeowners(content);
267 assert_eq!(rules.len(), 2, "ownerless `docs/` line is skipped");
268 assert_eq!(rules[0].pattern, "src/payments/**");
269 assert_eq!(rules[0].owners, vec!["@pay-team", "@alice"]);
270 assert_eq!(rules[1].pattern, "*.rs");
271 }
272
273 #[test]
274 fn scan_markers_is_case_insensitive_and_skips_long_lines() {
275 let content = "\
276let x = 1;\n\
277// do not remove: load-bearing init order\n\
278// SECURITY: validate before deref\n\
279let normal = 2;\n";
280 let hits = scan_markers("src/lib.rs", content);
281 assert_eq!(hits.len(), 2);
282 assert_eq!(hits[0].marker, "DO NOT REMOVE");
283 assert_eq!(hits[0].line, 2);
284 assert_eq!(hits[1].marker, "SECURITY:");
285
286 let long = format!("// DO NOT REMOVE {}", "x".repeat(MAX_LINE_LEN));
288 assert!(scan_markers("min.js", &long).is_empty());
289 }
290
291 #[test]
292 fn codeowners_candidates_are_unconfirmed_gotchas_keyed_by_pattern() {
293 let rules = parse_codeowners("src/payments/** @pay-team\n");
294 let recs = codeowners_candidates(&rules, dev(), 0, 100);
295 assert_eq!(recs.len(), 1);
296 assert!(is_unconfirmed_gotcha(&recs[0]));
297 assert_eq!(recs[0].key, "gotcha:codeowners:src/payments/**");
298 let g: GotchaRecord = serde_json::from_value(recs[0].payload.clone().unwrap()).unwrap();
299 assert_eq!(g.affected_files, vec!["src/payments/**"]);
300 assert!(!g.confirmed);
301 }
302
303 #[test]
304 fn marker_candidates_cap_and_key_format() {
305 let hits: Vec<MarkerHit> = (0..MAX_MARKER_CANDIDATES + 50)
307 .map(|i| MarkerHit {
308 path: format!("src/f{i}.rs"),
309 line: i + 1,
310 marker: "DO NOT REMOVE".into(),
311 text: "// DO NOT REMOVE".into(),
312 })
313 .collect();
314 let recs = marker_candidates(&hits, dev(), 0, 100);
315 assert_eq!(recs.len(), MAX_MARKER_CANDIDATES, "capped");
316 assert_eq!(recs[0].key, "gotcha:marker:src/f0.rs:1");
317 assert_eq!(recs[0].priority, Priority::High);
318 assert!(is_unconfirmed_gotcha(&recs[0]));
319 }
320
321 #[test]
322 fn build_candidates_combines_both_sources() {
323 let files = vec![(
324 "src/auth.rs".to_string(),
325 "// SECURITY: constant-time compare\n".to_string(),
326 )];
327 let recs = build_candidates(Some("src/** @team\n"), &files, dev(), 0, 100);
328 assert_eq!(recs.len(), 2);
329 assert!(recs.iter().all(is_unconfirmed_gotcha));
330 assert!(recs.iter().any(|r| r.key.starts_with("gotcha:codeowners:")));
331 assert!(recs.iter().any(|r| r.key.starts_with("gotcha:marker:")));
332 let clocks: std::collections::HashSet<u64> =
334 recs.iter().map(|r| r.version.logical_clock).collect();
335 assert_eq!(clocks.len(), recs.len());
336 }
337}