1use regex::Regex;
7use serde::Serialize;
8use std::collections::HashSet;
9
10#[derive(Debug, Clone, Serialize)]
12pub struct ConfigQualityResult {
13 pub score: f64,
15 pub has_claude_md: bool,
17 pub has_claude_instructions: bool,
19 pub has_readme: bool,
21 pub details: Vec<ConfigDetail>,
23 #[serde(default)]
25 pub llm_adjusted: bool,
26}
27
28#[derive(Debug, Clone, Serialize)]
29pub struct ConfigDetail {
30 pub file: String,
32 pub length_score: f64,
34 pub structure_score: f64,
36 pub specificity_score: f64,
38 pub actionable_score: f64,
40 pub file_refs_score: f64,
42 pub shell_commands_score: f64,
44 pub recency_score: f64,
46 #[serde(skip_serializing_if = "Option::is_none")]
48 pub llm_quality_score: Option<f64>,
49}
50
51pub struct ConfigFile {
53 pub relative_path: String,
54 pub content: String,
55 pub days_since_modified: Option<i64>,
56}
57
58pub struct ConfigQualityAnalyzer;
59
60pub fn is_meaningful_content(content: &str) -> bool {
68 if content.len() < 100 {
69 return false;
70 }
71
72 let word_count = content.split_whitespace().count();
73 if word_count < 20 {
74 return false;
75 }
76
77 let has_heading = content.lines().any(|l| l.starts_with('#'));
78 if !has_heading {
79 return false;
80 }
81
82 let total = content.len() as f64;
84 let mut freq = [0u32; 256];
85 for &b in content.as_bytes() {
86 freq[b as usize] += 1;
87 }
88 let entropy: f64 = freq
89 .iter()
90 .filter(|&&c| c > 0)
91 .map(|&c| {
92 let p = c as f64 / total;
93 -p * p.log2()
94 })
95 .sum();
96
97 entropy >= 2.5
98}
99
100pub fn resolve_agents_reference(content: &str, agents_content: Option<&str>) -> String {
112 let trimmed = content.trim();
113 let is_reference = trimmed.len() < 50
114 && (trimmed.contains("AGENTS.md") || trimmed.contains("agents.md"));
115
116 if is_reference {
117 if let Some(agents) = agents_content {
118 return format!("<!-- Resolved from: {} -->\n{}", trimmed, agents);
119 }
120 }
121
122 content.to_string()
123}
124
125impl ConfigQualityAnalyzer {
126 pub fn analyze(
131 config_files: &[ConfigFile],
132 known_files: &HashSet<String>,
133 ) -> ConfigQualityResult {
134 let has_claude_md = config_files.iter().any(|f| {
135 f.relative_path == "CLAUDE.md" || f.relative_path.ends_with("/CLAUDE.md")
136 });
137 let has_claude_instructions = config_files.iter().any(|f| {
138 f.relative_path == ".claude/instructions"
139 || f.relative_path.contains(".claude/instructions")
140 });
141 let has_readme = config_files.iter().any(|f| {
142 let name = f.relative_path.to_uppercase();
143 name == "README.MD" || name.starts_with("README.")
144 });
145
146 let claude_md_meaningful = config_files.iter().any(|f| {
148 (f.relative_path == "CLAUDE.md" || f.relative_path.ends_with("/CLAUDE.md"))
149 && is_meaningful_content(&f.content)
150 });
151 let claude_instructions_meaningful = config_files.iter().any(|f| {
152 (f.relative_path == ".claude/instructions"
153 || f.relative_path.contains(".claude/instructions"))
154 && is_meaningful_content(&f.content)
155 });
156 let readme_meaningful = config_files.iter().any(|f| {
157 let name = f.relative_path.to_uppercase();
158 (name == "README.MD" || name.starts_with("README."))
159 && is_meaningful_content(&f.content)
160 });
161
162 let existence_score = {
163 let mut s = 0.0;
164 if claude_md_meaningful { s += 0.4; }
165 if claude_instructions_meaningful { s += 0.3; }
166 if readme_meaningful { s += 0.3; }
167 s
168 };
169
170 if config_files.is_empty() {
171 return ConfigQualityResult {
172 score: 0.0,
173 has_claude_md,
174 has_claude_instructions,
175 has_readme,
176 details: vec![],
177 llm_adjusted: false,
178 };
179 }
180
181 let mut details = Vec::new();
182 let mut content_scores = Vec::new();
183
184 const W_LENGTH: f64 = 0.12;
189 const W_STRUCTURE: f64 = 0.13;
190 const W_SPECIFICITY: f64 = 0.20;
191 const W_ACTIONABLE: f64 = 0.18;
192 const W_FILE_REFS: f64 = 0.13;
193 const W_SHELL_COMMANDS: f64 = 0.14;
194 const W_RECENCY: f64 = 0.10;
195
196 for file in config_files {
197 let detail = Self::analyze_file(file, known_files);
198 let file_score = W_LENGTH * detail.length_score
199 + W_STRUCTURE * detail.structure_score
200 + W_SPECIFICITY * detail.specificity_score
201 + W_ACTIONABLE * detail.actionable_score
202 + W_FILE_REFS * detail.file_refs_score
203 + W_SHELL_COMMANDS * detail.shell_commands_score
204 + W_RECENCY * detail.recency_score;
205 content_scores.push(file_score);
206 details.push(detail);
207 }
208
209 let avg_content = content_scores.iter().sum::<f64>() / content_scores.len() as f64;
210
211 let score = 0.4 * existence_score + 0.6 * avg_content;
213
214 ConfigQualityResult {
215 score,
216 has_claude_md,
217 has_claude_instructions,
218 has_readme,
219 details,
220 llm_adjusted: false,
221 }
222 }
223
224 fn analyze_file(file: &ConfigFile, known_files: &HashSet<String>) -> ConfigDetail {
225 let content = &file.content;
226 let chars = content.len();
227
228 let length_score = if chars < 500 {
230 chars as f64 / 500.0 * 0.4
231 } else if chars < 2000 {
232 0.4 + (chars - 500) as f64 / 1500.0 * 0.3
233 } else if chars < 5000 {
234 0.7 + (chars - 2000) as f64 / 3000.0 * 0.2
235 } else {
236 0.9 + (0.1_f64).min((chars - 5000) as f64 / 5000.0 * 0.1)
237 };
238
239 let heading_count = content.lines().filter(|l| l.starts_with('#')).count();
241 let code_block_count = content.matches("```").count() / 2;
242 let list_count = content.lines().filter(|l| {
243 let trimmed = l.trim();
244 trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("1.")
245 }).count();
246 let structure_elements = heading_count + code_block_count + list_count;
247 let structure_score = (structure_elements as f64 / 15.0).min(1.0);
248
249 let specificity_score = Self::score_specificity(content);
251
252 let actionable_keywords = ["MUST", "NEVER", "ALWAYS", "REQUIRED", "IMPORTANT", "SHALL", "DO NOT"];
254 let actionable_count: usize = actionable_keywords
255 .iter()
256 .map(|kw| content.matches(kw).count())
257 .sum();
258 let actionable_score = (actionable_count as f64 / 10.0).min(1.0);
259
260 let file_refs_score = Self::score_file_refs(content, known_files);
262
263 let shell_pattern = Regex::new(r"```(?:bash|sh|shell|zsh)\n").unwrap();
265 let shell_blocks = shell_pattern.find_iter(content).count();
266 let shell_commands_score = (shell_blocks as f64 / 3.0).min(1.0);
267
268 let recency_score = match file.days_since_modified {
270 Some(days) if days <= 7 => 1.0,
271 Some(days) if days <= 30 => 1.0 - 0.4 * ((days - 7) as f64 / 23.0),
272 Some(days) if days <= 90 => 0.6 - 0.3 * ((days - 30) as f64 / 60.0),
273 Some(_) => 0.2,
274 None => 0.5,
275 };
276
277 ConfigDetail {
278 file: file.relative_path.clone(),
279 length_score,
280 structure_score,
281 specificity_score,
282 actionable_score,
283 file_refs_score,
284 shell_commands_score,
285 recency_score,
286 llm_quality_score: None,
287 }
288 }
289
290 fn score_specificity(content: &str) -> f64 {
298 let path_re = Regex::new(
300 r"(?:src/|lib/|docs/|\.claude/|components/|app/|tests/|crates/)?[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\.[a-zA-Z]{1,6}"
301 ).unwrap();
302 let real_paths = path_re.find_iter(content).count();
303 let signal_a = (real_paths as f64 / 15.0).min(1.0);
304
305 let ident_re = Regex::new(r"`([a-z][a-zA-Z0-9_]{2,})`").unwrap();
307 let common_words: HashSet<&str> = [
308 "the", "and", "for", "not", "you", "all", "can", "has", "use",
309 "this", "that", "with", "from", "will", "your", "code", "file",
310 "true", "false", "null", "none", "some", "each", "when", "then",
311 ].iter().copied().collect();
312 let identifiers: HashSet<String> = ident_re
313 .captures_iter(content)
314 .filter_map(|cap| {
315 let id = cap.get(1)?.as_str().to_lowercase();
316 if common_words.contains(id.as_str()) { None } else { Some(id) }
317 })
318 .collect();
319 let signal_b = (identifiers.len() as f64 / 10.0).min(1.0);
320
321 let words: Vec<&str> = content
323 .split_whitespace()
324 .filter(|w| w.len() > 3)
325 .collect();
326 let signal_c = if words.is_empty() {
327 0.0
328 } else {
329 let unique: HashSet<&str> = words.iter().copied().collect();
330 (unique.len() as f64 / words.len() as f64).min(1.0)
331 };
332
333 let generic_phrases = [
335 "write clean code", "follow best practices", "be helpful",
336 "use typescript", "write good code", "keep it simple",
337 "be concise", "write tests", "follow conventions",
338 ];
339 let content_lower = content.to_lowercase();
340 let generic_count = generic_phrases
341 .iter()
342 .filter(|phrase| content_lower.contains(*phrase))
343 .count();
344 let signal_d = 1.0 - (generic_count as f64 * 0.1).min(0.3);
345
346 0.35 * signal_a + 0.25 * signal_b + 0.25 * signal_c + 0.15 * signal_d
347 }
348
349 pub fn blend_agents_md_score(config_result: &mut ConfigQualityResult, agents_md_score: f64) {
355 if agents_md_score > 0.0 {
356 config_result.score = 0.70 * config_result.score + 0.30 * agents_md_score;
357 }
358 }
359
360 fn score_file_refs(content: &str, known_files: &HashSet<String>) -> f64 {
362 let path_re = Regex::new(r"`([a-zA-Z0-9_./-]+\.[a-zA-Z]{1,6})`").unwrap();
364 let refs: Vec<String> = path_re
365 .captures_iter(content)
366 .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
367 .collect();
368
369 if refs.is_empty() {
370 return 0.5; }
372
373 let valid = refs.iter().filter(|r| known_files.contains(r.as_str())).count();
374 valid as f64 / refs.len() as f64
375 }
376}
377
378#[cfg(test)]
379mod tests {
380 use super::*;
381
382 fn make_known_files(files: &[&str]) -> HashSet<String> {
383 files.iter().map(|s| s.to_string()).collect()
384 }
385
386 #[test]
387 fn test_no_config_files() {
388 let result = ConfigQualityAnalyzer::analyze(&[], &HashSet::new());
389 assert!((result.score - 0.0).abs() < 0.01);
390 assert!(!result.has_claude_md);
391 assert!(!result.has_readme);
392 }
393
394 #[test]
395 fn test_minimal_claude_md() {
396 let files = vec![ConfigFile {
397 relative_path: "CLAUDE.md".to_string(),
398 content: "# Project\nShort description.".to_string(),
399 days_since_modified: Some(0),
400 }];
401 let result = ConfigQualityAnalyzer::analyze(&files, &HashSet::new());
402 assert!(result.has_claude_md);
403 assert!(result.score > 0.0);
405 assert!(result.score < 0.7);
406 }
407
408 #[test]
409 fn test_rich_claude_md() {
410 let content = r#"# Kardo Project
411
412## Stack
413- Next.js 16, React 19, TypeScript
414- Tailwind CSS, shadcn/ui
415
416## Rules
417
418You MUST always run `npm run build` before committing.
419You MUST NEVER edit `components/ui/*` files.
420ALWAYS check types with `npx tsc --noEmit`.
421
422```bash
423npm run dev
424npm run build
425npm run lint
426```
427
428```bash
429cargo test
430```
431
432## Files
433- `src/lib/types.ts` — type definitions
434- `docs/api.md` — API documentation
435- `README.md` — project readme
436
437## IMPORTANT
438
439This is a REQUIRED reading for all contributors.
440ALWAYS follow the coding standards.
441NEVER push directly to main without review.
442"#;
443
444 let files = vec![ConfigFile {
445 relative_path: "CLAUDE.md".to_string(),
446 content: content.to_string(),
447 days_since_modified: Some(2),
448 }];
449 let known = make_known_files(&["src/lib/types.ts", "docs/api.md", "README.md"]);
450 let result = ConfigQualityAnalyzer::analyze(&files, &known);
451 assert!(result.has_claude_md);
452 assert!(result.score > 0.55, "Rich CLAUDE.md should score high, got {}", result.score);
453 }
454
455 #[test]
456 fn test_existence_scoring() {
457 let files = vec![
458 ConfigFile {
459 relative_path: "CLAUDE.md".to_string(),
460 content: "# Project\nBasic.".to_string(),
461 days_since_modified: Some(0),
462 },
463 ConfigFile {
464 relative_path: ".claude/instructions".to_string(),
465 content: "Instructions here.".to_string(),
466 days_since_modified: Some(0),
467 },
468 ConfigFile {
469 relative_path: "README.md".to_string(),
470 content: "# README\nProject info.".to_string(),
471 days_since_modified: Some(0),
472 },
473 ];
474 let result = ConfigQualityAnalyzer::analyze(&files, &HashSet::new());
475 assert!(result.has_claude_md);
476 assert!(result.has_claude_instructions);
477 assert!(result.has_readme);
478 }
479
480 #[test]
481 fn test_specificity_generic_low_score() {
482 let content = "Write clean code. Follow best practices. Be helpful. Use TypeScript. Keep it simple.";
484 let score = ConfigQualityAnalyzer::score_specificity(content);
485 assert!(score < 0.50, "Generic content should score low, got {}", score);
486 }
487
488 #[test]
489 fn test_specificity_specific_high_score() {
490 let content = r#"
492Edit `src/lib/types.ts` for type definitions.
493Never modify `components/ui/button.tsx` directly.
494Run `cargo test` in `crates/kardo-core/`.
495The `ScoringEngine` reads from `analysis/config_quality.rs`.
496Use `ConfigQualityAnalyzer` for scoring.
497Check `docs/UI_DECISIONS.md` before changes.
498The `parseMarkdown` function handles `src/parser/mod.rs`.
499Always run `npx tsc --noEmit` before committing.
500"#;
501 let score = ConfigQualityAnalyzer::score_specificity(content);
502 assert!(score > 0.50, "Specific content should score high, got {}", score);
503 }
504
505 #[test]
506 fn test_specificity_path_density() {
507 let many_paths = "src/lib.rs, src/main.rs, docs/api.md, tests/test.rs, crates/core/mod.rs, components/ui/button.tsx, app/page.tsx, lib/types/index.ts";
508 let no_paths = "This is a project. It does things. Write good code always.";
509 let score_paths = ConfigQualityAnalyzer::score_specificity(many_paths);
510 let score_none = ConfigQualityAnalyzer::score_specificity(no_paths);
511 assert!(score_paths > score_none, "Path-rich should score higher ({} vs {})", score_paths, score_none);
512 }
513
514 #[test]
515 fn test_specificity_generic_penalty() {
516 let with_generic = "Write clean code. Follow best practices. Be helpful.";
517 let without_generic = "Configure `rustfmt` in `crates/core/.rustfmt.toml`. Run `cargo clippy`.";
518 let score_generic = ConfigQualityAnalyzer::score_specificity(with_generic);
519 let score_specific = ConfigQualityAnalyzer::score_specificity(without_generic);
520 assert!(score_specific > score_generic, "Non-generic should score higher ({} vs {})", score_specific, score_generic);
521 }
522
523 #[test]
524 fn test_agents_resolution_replaces_short_reference() {
525 let content = "@AGENTS.md";
526 let agents = "# My Agents\n\n## Agent 1\nDoes stuff.\n\n## Agent 2\nDoes more stuff.\n";
527 let resolved = resolve_agents_reference(content, Some(agents));
528 assert!(resolved.contains("# My Agents"));
529 assert!(resolved.contains("Resolved from:"));
530 assert!(resolved.len() > 50);
531 }
532
533 #[test]
534 fn test_agents_resolution_preserves_real_content() {
535 let content = "# CLAUDE.md\n\nThis is a real CLAUDE.md with actual content that references AGENTS.md somewhere but is long enough to not be a reference.\n\n## Rules\nDo stuff.\n";
536 let agents = "# Agents content";
537 let resolved = resolve_agents_reference(content, Some(agents));
538 assert_eq!(resolved, content);
539 }
540
541 #[test]
542 fn test_agents_resolution_no_agents_file() {
543 let content = "@AGENTS.md";
544 let resolved = resolve_agents_reference(content, None);
545 assert_eq!(resolved, content);
546 }
547
548 #[test]
549 fn test_stale_config() {
550 let files = vec![ConfigFile {
551 relative_path: "CLAUDE.md".to_string(),
552 content: "# Project\nSome rules and instructions here that are fairly long to test length scoring properly with enough content.".to_string(),
553 days_since_modified: Some(120),
554 }];
555 let result = ConfigQualityAnalyzer::analyze(&files, &HashSet::new());
556 let detail = &result.details[0];
558 assert!(detail.recency_score < 0.5);
559 }
560
561 #[test]
562 fn test_is_meaningful_content_trivial() {
563 assert!(!is_meaningful_content("@AGENTS.md"));
565 assert!(!is_meaningful_content("# Project\nShort description."));
566 assert!(!is_meaningful_content(""));
567 }
568
569 #[test]
570 fn test_is_meaningful_content_real() {
571 let content = r#"# Kardo Project
573
574## Stack
575- Next.js 16, React 19, TypeScript
576- Tailwind CSS, shadcn/ui
577
578## Rules
579
580You MUST always run `npm run build` before committing.
581You MUST NEVER edit `components/ui/*` files.
582ALWAYS check types with `npx tsc --noEmit`.
583
584```bash
585npm run dev
586npm run build
587```
588
589## Files
590- `src/lib/types.ts` — type definitions
591- `docs/api.md` — API documentation
592"#;
593 assert!(is_meaningful_content(content));
594 }
595
596 #[test]
597 fn test_empty_claude_md_does_not_inflate_existence_score() {
598 let files = vec![ConfigFile {
600 relative_path: "CLAUDE.md".to_string(),
601 content: "@AGENTS.md".to_string(),
602 days_since_modified: Some(0),
603 }];
604 let result = ConfigQualityAnalyzer::analyze(&files, &HashSet::new());
605 assert!(result.has_claude_md);
607 assert!(result.score < 0.2, "Trivial CLAUDE.md should not inflate score, got {}", result.score);
609 }
610}