Skip to main content

agent_rules/
actionable.rs

1//! Domain-specific validation for agent instruction files.
2//!
3//! Contains checks for actionable content and project structure paths.
4//! Moved from `instruction-files::audit` to centralise rule validation.
5
6use agent_kit::audit_common::{AuditConfig, Issue, is_agent_file};
7use once_cell::sync::Lazy;
8use regex::Regex;
9use std::path::Path;
10
11static SKIP_PATHS: Lazy<std::collections::HashSet<&str>> =
12    Lazy::new(|| [".env"].iter().copied().collect());
13
14static IMPERATIVE_RE: Lazy<Regex> = Lazy::new(|| {
15    Regex::new(
16        r"(?i)\b(use|add|create|run|do|don't|never|must|should|avoid|prefer|ensure|keep|set)\b",
17    )
18    .unwrap()
19});
20
21static TABLE_SEP_RE: Lazy<Regex> = Lazy::new(|| {
22    Regex::new(r"^\|[\s:]*-+[\s:]*(\|[\s:]*-+[\s:]*)*\|?\s*$").unwrap()
23});
24
25const INFORMATIONAL_HEADINGS: &[&str] = &[
26    "project structure",
27    "directory layout",
28    "architecture",
29    "overview",
30    "tech stack",
31    "sources",
32    "bibliography",
33    "references",
34    "available tools",
35    "resources",
36];
37
38/// Parse file paths from a "## Project Structure" tree block.
39pub fn extract_tree_paths(content: &str) -> Vec<(usize, String)> {
40    let mut results = Vec::new();
41    let lines: Vec<&str> = content.lines().collect();
42    let mut in_section = false;
43    let mut in_block = false;
44    let mut stack: Vec<(usize, String)> = Vec::new();
45
46    for (i, line) in lines.iter().enumerate() {
47        let line_no = i + 1;
48        if line.starts_with("## Project Structure") {
49            in_section = true;
50            continue;
51        }
52        if in_section && !in_block {
53            if line.trim().starts_with("```") {
54                in_block = true;
55                continue;
56            }
57            if line.starts_with("## ") {
58                break;
59            }
60            continue;
61        }
62        if !in_block {
63            continue;
64        }
65        if line.trim().starts_with("```") {
66            break;
67        }
68
69        let stripped = line.trim_end();
70        let trimmed = stripped.trim();
71        if trimmed.is_empty() {
72            continue;
73        }
74        let indent = stripped.len() - stripped.trim_start().len();
75        let mut name = trimmed.split('#').next().unwrap_or("").trim().to_string();
76        if name.is_empty() {
77            continue;
78        }
79
80        if name.contains(" -> ") {
81            name = format!("{}/", name.split(" -> ").next().unwrap_or("").trim());
82        }
83
84        while stack.last().map(|(ind, _)| *ind >= indent).unwrap_or(false) {
85            stack.pop();
86        }
87
88        if name.ends_with('/') {
89            stack.push((indent, name));
90        } else {
91            let mut parts: Vec<String> = stack.iter().map(|(_, d)| d.clone()).collect();
92            parts.push(name);
93            let full = parts.join("");
94            results.push((line_no, full));
95        }
96    }
97
98    results
99}
100
101/// Check that file paths referenced in "## Project Structure" blocks exist.
102pub fn check_tree_paths(rel: &str, content: &str, root: &Path) -> Vec<Issue> {
103    let mut issues = Vec::new();
104    let bracket_re = Regex::new(r"\[.*?]").unwrap();
105    for (line_no, path) in extract_tree_paths(content) {
106        if bracket_re.is_match(&path) {
107            continue;
108        }
109        if SKIP_PATHS.contains(path.as_str()) {
110            continue;
111        }
112        if !root.join(&path).exists() {
113            issues.push(Issue {
114                file: rel.to_string(),
115                line: line_no,
116                end_line: 0,
117                message: format!("Referenced path does not exist: {}", path),
118                warning: false,
119            });
120        }
121    }
122    issues
123}
124
125/// Return the heading level (1-6) and title text for a markdown heading line.
126fn heading_level(line: &str) -> Option<(usize, &str)> {
127    let hashes = line.bytes().take_while(|&b| b == b'#').count();
128    if hashes == 0 || hashes > 6 {
129        return None;
130    }
131    let rest = &line[hashes..];
132    if rest.starts_with(' ') {
133        Some((hashes, rest.trim()))
134    } else {
135        None
136    }
137}
138
139/// A bullet line that is primarily a link or backtick-enclosed identifier.
140fn is_link_bullet(line: &str) -> bool {
141    let stripped = line.strip_prefix("- ").or_else(|| line.strip_prefix("* "));
142    match stripped {
143        Some(rest) => rest.starts_with('[') || rest.starts_with('`'),
144        None => false,
145    }
146}
147
148/// A line that can appear within a link-heavy list block.
149fn is_list_context(line: &str) -> bool {
150    line.trim().is_empty()
151        || line.starts_with("### ")
152        || line.starts_with("#### ")
153        || is_link_bullet(line)
154}
155
156/// Check that agent instruction files contain actionable content.
157pub fn check_actionable(rel: &str, content: &str, config: &AuditConfig) -> Vec<Issue> {
158    if !is_agent_file(rel, config) {
159        return vec![];
160    }
161
162    let lines: Vec<&str> = content.lines().collect();
163    let mut issues = Vec::new();
164
165    // 1. Informational section headings
166    for (i, line) in lines.iter().enumerate() {
167        if let Some((level, title)) = heading_level(line) {
168            let title_lower = title.to_lowercase();
169            if INFORMATIONAL_HEADINGS.iter().any(|h| title_lower == *h) {
170                let mut end = lines.len();
171                for (j, line_j) in lines.iter().enumerate().skip(i + 1) {
172                    if let Some((next_level, _)) = heading_level(line_j)
173                        && next_level <= level
174                    {
175                        end = j;
176                        break;
177                    }
178                }
179                while end > i + 1 && lines[end - 1].trim().is_empty() {
180                    end -= 1;
181                }
182                issues.push(Issue {
183                    file: rel.to_string(),
184                    line: i + 1,
185                    end_line: end,
186                    message: format!(
187                        "Informational section \"{}\" \u{2014} consider moving to README.md",
188                        title
189                    ),
190                    warning: true,
191                });
192            }
193        }
194    }
195
196    // 2. Large fenced code blocks (> 8 lines) without imperative verb in 2 preceding lines
197    {
198        let mut i = 0;
199        while i < lines.len() {
200            if lines[i].trim().starts_with("```") {
201                let start = i;
202                i += 1;
203                while i < lines.len() && !lines[i].trim().starts_with("```") {
204                    i += 1;
205                }
206                let close = i;
207                let block_lines = close - start - 1;
208                if block_lines > 8 {
209                    let check_start = start.saturating_sub(2);
210                    let preceding = &lines[check_start..start];
211                    let has_imperative = preceding.iter().any(|l| IMPERATIVE_RE.is_match(l));
212                    if !has_imperative {
213                        issues.push(Issue {
214                            file: rel.to_string(),
215                            line: start + 1,
216                            end_line: if close < lines.len() {
217                                close + 1
218                            } else {
219                                close
220                            },
221                            message: format!(
222                                "Large code block ({} lines) without imperative context \u{2014} consider moving to README.md",
223                                block_lines
224                            ),
225                            warning: true,
226                        });
227                    }
228                }
229            }
230            i += 1;
231        }
232    }
233
234    // 3. Large tables (> 5 non-separator rows)
235    {
236        let mut i = 0;
237        while i < lines.len() {
238            if lines[i].trim_start().starts_with('|') {
239                let start = i;
240                let mut rows = 0;
241                while i < lines.len() && lines[i].trim_start().starts_with('|') {
242                    if !TABLE_SEP_RE.is_match(lines[i].trim()) {
243                        rows += 1;
244                    }
245                    i += 1;
246                }
247                if rows > 5 {
248                    issues.push(Issue {
249                        file: rel.to_string(),
250                        line: start + 1,
251                        end_line: i,
252                        message: format!(
253                            "Large table ({} rows) \u{2014} consider moving to README.md",
254                            rows
255                        ),
256                        warning: true,
257                    });
258                }
259                continue;
260            }
261            i += 1;
262        }
263    }
264
265    // 4. Link-heavy bullet lists (> 10 consecutive link/backtick bullets)
266    {
267        let mut i = 0;
268        while i < lines.len() {
269            if is_link_bullet(lines[i]) {
270                let start = i;
271                let mut count = 0;
272                while i < lines.len() && is_list_context(lines[i]) {
273                    if is_link_bullet(lines[i]) {
274                        count += 1;
275                    }
276                    i += 1;
277                }
278                let mut end = i;
279                while end > start && lines[end - 1].trim().is_empty() {
280                    end -= 1;
281                }
282                if count > 10 {
283                    issues.push(Issue {
284                        file: rel.to_string(),
285                        line: start + 1,
286                        end_line: end,
287                        message: format!(
288                            "Link-heavy list ({} items) \u{2014} consider moving to README.md",
289                            count
290                        ),
291                        warning: true,
292                    });
293                }
294                continue;
295            }
296            i += 1;
297        }
298    }
299
300    issues
301}
302
303#[cfg(test)]
304mod tests {
305    use super::*;
306    use std::fs;
307    use tempfile::TempDir;
308
309    // --- extract_tree_paths ---
310
311    #[test]
312    fn extract_tree_paths_basic() {
313        let content = "\
314## Project Structure
315
316```
317src/
318  main.rs
319  lib.rs
320```
321";
322        let paths = extract_tree_paths(content);
323        assert_eq!(paths.len(), 2);
324        assert_eq!(paths[0].1, "src/main.rs");
325        assert_eq!(paths[1].1, "src/lib.rs");
326    }
327
328    #[test]
329    fn extract_tree_paths_nested() {
330        let content = "\
331## Project Structure
332
333```
334src/
335  agent/
336    mod.rs
337    claude.rs
338  main.rs
339```
340";
341        let paths = extract_tree_paths(content);
342        assert_eq!(paths.len(), 3);
343        assert_eq!(paths[0].1, "src/agent/mod.rs");
344        assert_eq!(paths[1].1, "src/agent/claude.rs");
345        assert_eq!(paths[2].1, "src/main.rs");
346    }
347
348    #[test]
349    fn extract_tree_paths_symlink() {
350        let content = "\
351## Project Structure
352
353```
354mail -> ../data/mail
355src/
356  main.rs
357```
358";
359        let paths = extract_tree_paths(content);
360        assert_eq!(paths.len(), 1);
361        assert_eq!(paths[0].1, "src/main.rs");
362    }
363
364    #[test]
365    fn extract_tree_paths_with_comments() {
366        let content = "\
367## Project Structure
368
369```
370src/
371  main.rs  # entry point
372  lib.rs   # library
373```
374";
375        let paths = extract_tree_paths(content);
376        assert_eq!(paths.len(), 2);
377        assert_eq!(paths[0].1, "src/main.rs");
378        assert_eq!(paths[1].1, "src/lib.rs");
379    }
380
381    #[test]
382    fn extract_tree_paths_no_section() {
383        let content = "# Just a heading\n\nSome text.\n";
384        let paths = extract_tree_paths(content);
385        assert!(paths.is_empty());
386    }
387
388    #[test]
389    fn extract_tree_paths_empty_block() {
390        let content = "\
391## Project Structure
392
393```
394```
395";
396        let paths = extract_tree_paths(content);
397        assert!(paths.is_empty());
398    }
399
400    #[test]
401    fn extract_tree_paths_stops_at_next_section() {
402        let content = "\
403## Project Structure
404
405```
406src/
407  main.rs
408```
409
410## Other Section
411
412Some text.
413";
414        let paths = extract_tree_paths(content);
415        assert_eq!(paths.len(), 1);
416        assert_eq!(paths[0].1, "src/main.rs");
417    }
418
419    #[test]
420    fn extract_tree_paths_line_numbers() {
421        let content = "\
422## Project Structure
423
424```
425Cargo.toml
426src/
427  main.rs
428```
429";
430        let paths = extract_tree_paths(content);
431        assert_eq!(paths[0], (4, "Cargo.toml".to_string()));
432        assert_eq!(paths[1], (6, "src/main.rs".to_string()));
433    }
434
435    // --- check_tree_paths ---
436
437    #[test]
438    fn check_tree_paths_existing() {
439        let tmp = TempDir::new().unwrap();
440        let root = tmp.path();
441        fs::create_dir_all(root.join("src")).unwrap();
442        fs::write(root.join("src/main.rs"), "fn main() {}").unwrap();
443
444        let content = "\
445## Project Structure
446
447```
448src/
449  main.rs
450```
451";
452        let issues = check_tree_paths("CLAUDE.md", content, root);
453        assert!(issues.is_empty());
454    }
455
456    #[test]
457    fn check_tree_paths_missing() {
458        let tmp = TempDir::new().unwrap();
459        let root = tmp.path();
460
461        let content = "\
462## Project Structure
463
464```
465src/
466  missing.rs
467```
468";
469        let issues = check_tree_paths("CLAUDE.md", content, root);
470        assert_eq!(issues.len(), 1);
471        assert!(issues[0].message.contains("missing.rs"));
472        assert!(!issues[0].warning);
473    }
474
475    #[test]
476    fn check_tree_paths_skips_brackets() {
477        let tmp = TempDir::new().unwrap();
478        let root = tmp.path();
479
480        let content = "\
481## Project Structure
482
483```
484src/
485  [generated files]
486```
487";
488        let issues = check_tree_paths("CLAUDE.md", content, root);
489        assert!(issues.is_empty());
490    }
491
492    #[test]
493    fn check_tree_paths_skips_env() {
494        let tmp = TempDir::new().unwrap();
495        let root = tmp.path();
496
497        let content = "\
498## Project Structure
499
500```
501.env
502```
503";
504        let issues = check_tree_paths("CLAUDE.md", content, root);
505        assert!(issues.is_empty());
506    }
507
508    // --- heading_level ---
509
510    #[test]
511    fn heading_level_basic() {
512        assert_eq!(heading_level("# Title"), Some((1, "Title")));
513        assert_eq!(heading_level("## Section"), Some((2, "Section")));
514        assert_eq!(heading_level("### Sub"), Some((3, "Sub")));
515        assert_eq!(heading_level("###### Deep"), Some((6, "Deep")));
516    }
517
518    #[test]
519    fn heading_level_rejects_invalid() {
520        assert_eq!(heading_level("Not a heading"), None);
521        assert_eq!(heading_level("##NoSpace"), None);
522        assert_eq!(heading_level("####### Too deep"), None);
523        assert_eq!(heading_level(""), None);
524    }
525
526    // --- is_link_bullet ---
527
528    #[test]
529    fn is_link_bullet_matches() {
530        assert!(is_link_bullet("- [link](url)"));
531        assert!(is_link_bullet("- `code` description"));
532        assert!(is_link_bullet("* [link](url)"));
533        assert!(is_link_bullet("* `code`"));
534    }
535
536    #[test]
537    fn is_link_bullet_rejects() {
538        assert!(!is_link_bullet("- plain text"));
539        assert!(!is_link_bullet("not a bullet"));
540        assert!(!is_link_bullet("  - indented"));
541    }
542
543    // --- is_list_context ---
544
545    #[test]
546    fn is_list_context_matches() {
547        assert!(is_list_context(""));
548        assert!(is_list_context("   "));
549        assert!(is_list_context("### Sub heading"));
550        assert!(is_list_context("#### Deep heading"));
551        assert!(is_list_context("- [link](url)"));
552    }
553
554    #[test]
555    fn is_list_context_rejects() {
556        assert!(!is_list_context("- plain text"));
557        assert!(!is_list_context("## Section"));
558        assert!(!is_list_context("some paragraph"));
559    }
560
561    // --- check_actionable ---
562
563    #[test]
564    fn check_actionable_skips_non_agent_files() {
565        let config = AuditConfig::agent_doc();
566        let issues = check_actionable("README.md", "## Overview\n\nSome overview.\n", &config);
567        assert!(issues.is_empty());
568    }
569
570    #[test]
571    fn check_actionable_informational_heading() {
572        let config = AuditConfig::agent_doc();
573        let content = "# Doc\n\n## Overview\n\nSome overview text.\n\n## Rules\n\nDo this.\n";
574        let issues = check_actionable("CLAUDE.md", content, &config);
575        assert_eq!(issues.len(), 1);
576        assert!(issues[0].message.contains("Informational section"));
577        assert!(issues[0].message.contains("Overview"));
578        assert!(issues[0].warning);
579    }
580
581    #[test]
582    fn check_actionable_no_informational_heading() {
583        let config = AuditConfig::agent_doc();
584        let content = "# Doc\n\n## Conventions\n\nUse serde.\n";
585        let issues = check_actionable("AGENTS.md", content, &config);
586        assert!(issues.is_empty());
587    }
588
589    #[test]
590    fn check_actionable_large_code_block_without_context() {
591        let config = AuditConfig::agent_doc();
592        let mut lines = vec!["# Doc".to_string(), "".to_string()];
593        lines.push("```rust".to_string());
594        for i in 0..10 {
595            lines.push(format!("let x{} = {};", i, i));
596        }
597        lines.push("```".to_string());
598        let content = lines.join("\n");
599
600        let issues = check_actionable("CLAUDE.md", &content, &config);
601        assert_eq!(issues.len(), 1);
602        assert!(issues[0].message.contains("Large code block"));
603        assert!(issues[0].warning);
604    }
605
606    #[test]
607    fn check_actionable_large_code_block_with_imperative() {
608        let config = AuditConfig::agent_doc();
609        let mut lines = vec![
610            "# Doc".to_string(),
611            "".to_string(),
612            "Use the following pattern:".to_string(),
613        ];
614        lines.push("```rust".to_string());
615        for i in 0..10 {
616            lines.push(format!("let x{} = {};", i, i));
617        }
618        lines.push("```".to_string());
619        let content = lines.join("\n");
620
621        let issues = check_actionable("CLAUDE.md", &content, &config);
622        assert!(issues.is_empty());
623    }
624
625    #[test]
626    fn check_actionable_small_code_block_ok() {
627        let config = AuditConfig::agent_doc();
628        let content = "# Doc\n\n```\nfoo\nbar\n```\n";
629        let issues = check_actionable("AGENTS.md", content, &config);
630        assert!(issues.is_empty());
631    }
632
633    #[test]
634    fn check_actionable_large_table() {
635        let config = AuditConfig::agent_doc();
636        let mut lines = vec!["# Doc".to_string(), "".to_string()];
637        lines.push("| Col A | Col B |".to_string());
638        lines.push("|-------|-------|".to_string());
639        for i in 0..6 {
640            lines.push(format!("| row{} | val{} |", i, i));
641        }
642        let content = lines.join("\n");
643
644        let issues = check_actionable("CLAUDE.md", &content, &config);
645        assert_eq!(issues.len(), 1);
646        assert!(issues[0].message.contains("Large table"));
647        assert!(issues[0].warning);
648    }
649
650    #[test]
651    fn check_actionable_small_table_ok() {
652        let config = AuditConfig::agent_doc();
653        let content = "\
654# Doc
655
656| A | B |
657|---|---|
658| 1 | 2 |
659| 3 | 4 |
660";
661        let issues = check_actionable("SKILL.md", content, &config);
662        assert!(issues.is_empty());
663    }
664
665    #[test]
666    fn check_actionable_link_heavy_list() {
667        let config = AuditConfig::agent_doc();
668        let mut lines = vec!["# Doc".to_string(), "".to_string()];
669        for i in 0..12 {
670            lines.push(format!("- [link{}](https://example.com/{})", i, i));
671        }
672        let content = lines.join("\n");
673
674        let issues = check_actionable("CLAUDE.md", &content, &config);
675        assert_eq!(issues.len(), 1);
676        assert!(issues[0].message.contains("Link-heavy list"));
677        assert!(issues[0].warning);
678    }
679
680    #[test]
681    fn check_actionable_short_link_list_ok() {
682        let config = AuditConfig::agent_doc();
683        let mut lines = vec!["# Doc".to_string(), "".to_string()];
684        for i in 0..5 {
685            lines.push(format!("- [link{}](https://example.com/{})", i, i));
686        }
687        let content = lines.join("\n");
688
689        let issues = check_actionable("AGENTS.md", &content, &config);
690        assert!(issues.is_empty());
691    }
692
693    #[test]
694    fn check_actionable_claude_md_skipped_in_corky_config() {
695        let config = AuditConfig::corky();
696        let content = "# Doc\n\n## Overview\n\nSome overview.\n";
697        let issues = check_actionable("CLAUDE.md", content, &config);
698        assert!(issues.is_empty()); // CLAUDE.md is not an agent file in corky config
699    }
700}