Skip to main content

llmwiki_tooling/cmd/
agent.rs

1use std::collections::HashMap;
2
3use crate::error::WikiError;
4use crate::frontmatter;
5use crate::parse;
6use crate::wiki::WikiRoot;
7
8use super::{DirStats, detect_mirror_candidates, is_markdown_file};
9
10/// Output the setup workflow prompt for an LLM agent.
11pub fn setup(root: &WikiRoot) -> Result<(), WikiError> {
12    let has_config = root.path().join("wiki.toml").is_file();
13    let version = env!("CARGO_PKG_VERSION");
14
15    print!(
16        r#"## Wiki Tool Setup (v{version})
17
18You are configuring a wiki for use with the `wiki` CLI tool.
19
20### Discover available commands
21
22Run `wiki --help` to see all top-level commands.
23Run `wiki <command> --help` for subcommand details (e.g., `wiki links --help`).
24
25"#
26    );
27
28    if has_config {
29        print!(
30            r#"### wiki.toml already exists
31
32A wiki.toml is present at the wiki root. Skip to validation.
33
341. Read the existing wiki.toml to understand the current configuration.
352. Run `wiki scan` to see the actual wiki structure and check for mismatches.
363. Run `wiki lint` and iterate:
37   - Real content problem -> fix the wiki content
38   - Config too strict or wrong scope -> adjust wiki.toml
39   - Uncertain -> ask the user
404. Run `wiki links check` to verify auto-linking candidates look right.
415. Once everything is clean, proceed to Step 6 (automated linting) and Step 7 (persist).
42
43"#
44        );
45    } else {
46        print!(
47            r#"### Step 1: Scan the wiki structure
48
49Run: `wiki scan`
50
51This outputs per-directory statistics: file counts, frontmatter field coverage,
52common section headings, and detected mirror candidates.
53
54### Step 2: Learn the config schema
55
56Run: `wiki setup example-config`
57
58This outputs a complete wiki.toml with every option, annotated with comments.
59Study it to understand what's available.
60
61### Step 3: Generate and customize wiki.toml
62
63Run: `wiki setup init`
64
65This generates a starting-point wiki.toml. Edit it to customize:
66- Set `autolink = false` on directories whose page names are too long or specific
67  to be useful auto-link patterns (dates, identifiers, compound slugs)
68- Add `[[rules]]` for required sections, required frontmatter, mirror parity
69- Add citation patterns if the wiki tracks references to external sources
70- Adjust `[checks]` severities if needed
71
72### Step 4: Validate iteratively
73
74Run: `wiki lint`
75
76For each finding:
77- Real content problem -> fix the wiki content
78- Config too strict or wrong scope -> adjust wiki.toml
79- Uncertain -> ask the user
80
81Use `wiki lint --severity error` to focus on blocking issues first.
82Use `wiki lint --severity warn` to review advisories separately.
83
84Repeat until `wiki lint` exits clean.
85
86### Step 5: Verify commands
87
88Run and verify output makes sense:
89- `wiki links check` — bare mentions should be genuine misses, not false positives
90- `wiki links broken` — should be empty if the wiki is healthy
91- `wiki refs to <pick a page from the wiki>` — verify the link graph looks right
92- Review `wiki scan` output for inconsistent section headings across directories
93  and use `wiki sections rename` to standardize them
94
95"#
96        );
97    }
98
99    print!(
100        r#"### Step 6: Set up automated linting
101
102Configure `wiki lint` to run automatically before commits. Options:
103- Git pre-commit hook (`.githooks/pre-commit` or `.git/hooks/pre-commit`)
104- Agent hook (e.g., Claude Code `pre-commit` hook in `.claude/settings.json`)
105- Both, if the wiki is edited by agents and humans
106
107Choose what fits this project's setup.
108
109### Step 7: Update project documentation
110
111Check if project documentation (CLAUDE.md, AGENTS.md, .cursorrules, or equivalent)
112already references wiki tooling commands.
113
114If it does:
115- Update command references to match the current CLI (`wiki --help`)
116- Remove references to commands that no longer exist
117- Verify workflow instructions use the correct command names and flags
118
119If it doesn't:
120- Add a tooling section documenting the key commands and when to use them
121- Integrate commands into existing workflow documentation where relevant
122  (e.g., "run `wiki links fix --write` after ingest" in an ingest workflow)
123
124Key commands the documentation should cover:
125- `wiki lint` — structural integrity check (before commits)
126- `wiki links check` / `wiki links fix --write` — bare mention detection (after page creation)
127- `wiki rename <old> <new> --write` — page rename with reference update
128- `wiki refs to <page>` — impact analysis before editing
129- `wiki sections rename <old> <new> --write` — heading standardization
130- `wiki setup prompt` — re-read these instructions
131"#
132    );
133
134    Ok(())
135}
136
137/// Scan wiki structure and output per-directory statistics.
138pub fn scan(root: &WikiRoot) -> Result<(), WikiError> {
139    let wiki_root = root.path();
140
141    // Find all directories containing .md files
142    let mut dir_stats: HashMap<String, DirStats> = HashMap::new();
143
144    for entry in ignore::WalkBuilder::new(wiki_root).hidden(false).build() {
145        let entry = entry.map_err(|e| WikiError::Walk {
146            path: wiki_root.to_path_buf(),
147            source: e,
148        })?;
149        let path = entry.path();
150        if !is_markdown_file(path) {
151            continue;
152        }
153
154        let rel_path = path.strip_prefix(wiki_root).unwrap_or(path);
155        let dir = rel_path
156            .parent()
157            .and_then(|p| p.to_str())
158            .unwrap_or(".")
159            .to_owned();
160
161        let stats = dir_stats.entry(dir).or_default();
162        stats.file_count += 1;
163
164        let source = std::fs::read_to_string(path).map_err(|e| WikiError::ReadFile {
165            path: path.to_path_buf(),
166            source: e,
167        })?;
168
169        // Frontmatter analysis
170        if let Ok(Some(fm)) = frontmatter::parse_frontmatter(&source)
171            && let serde_yml::Value::Mapping(map) = fm.data()
172        {
173            for key in map.keys() {
174                if let Some(key_str) = key.as_str() {
175                    *stats
176                        .frontmatter_fields
177                        .entry(key_str.to_owned())
178                        .or_insert(0) += 1;
179                }
180            }
181        }
182
183        // Section heading analysis (## level)
184        let headings = parse::extract_headings(&source);
185        for h in &headings {
186            if h.level == 2 {
187                *stats.section_headings.entry(h.text.clone()).or_insert(0) += 1;
188            }
189        }
190    }
191
192    // Sort directories for consistent output
193    let mut dirs: Vec<_> = dir_stats.into_iter().collect();
194    dirs.sort_by(|a, b| a.0.cmp(&b.0));
195
196    for (dir, stats) in &dirs {
197        let display_dir = if dir.is_empty() { "." } else { dir.as_str() };
198        println!(
199            "## Directory: {display_dir}/ ({} files)\n",
200            stats.file_count
201        );
202
203        if !stats.frontmatter_fields.is_empty() {
204            println!("Frontmatter fields:");
205            let mut fields: Vec<_> = stats.frontmatter_fields.iter().collect();
206            fields.sort_by(|a, b| b.1.cmp(a.1));
207            for (field, count) in &fields {
208                let pct = **count as f64 / stats.file_count as f64 * 100.0;
209                println!("  {field:20} {count}/{} ({pct:.0}%)", stats.file_count);
210            }
211        } else {
212            println!("  No frontmatter detected.");
213        }
214
215        if !stats.section_headings.is_empty() {
216            println!("\nSection headings (## level):");
217            let mut headings: Vec<_> = stats.section_headings.iter().collect();
218            headings.sort_by(|a, b| b.1.cmp(a.1));
219            for (heading, count) in headings.iter().take(10) {
220                let pct = **count as f64 / stats.file_count as f64 * 100.0;
221                println!(
222                    "  \"{heading:18}\" {count}/{} ({pct:.0}%)",
223                    stats.file_count
224                );
225            }
226            if headings.len() > 10 {
227                println!("  ... and {} more", headings.len() - 10);
228            }
229        }
230
231        println!();
232    }
233
234    // Detect mirror candidates: directories with matching file stems
235    let dir_counts: Vec<(String, usize)> = dirs
236        .iter()
237        .map(|(dir, stats)| (dir.clone(), stats.file_count))
238        .collect();
239    let mirror_candidates = detect_mirror_candidates(&dir_counts);
240    if !mirror_candidates.is_empty() {
241        println!("## Mirror candidates\n");
242        for (a, b, count) in &mirror_candidates {
243            println!("  {a}/ ({count} files) <-> {b}/ ({count} files)");
244        }
245        println!();
246    }
247
248    // Check for index file
249    for candidate in &["index.md", "README.md", "_index.md"] {
250        let path = wiki_root.join(candidate);
251        if path.is_file() {
252            let source = std::fs::read_to_string(&path).map_err(|e| WikiError::ReadFile {
253                path: path.clone(),
254                source: e,
255            })?;
256            let wikilinks = parse::extract_wikilinks(&source);
257            let unique_refs: std::collections::HashSet<&str> =
258                wikilinks.iter().map(|wl| wl.page.as_str()).collect();
259            println!(
260                "## Index: {candidate}\n  References {} unique page names via wikilinks\n",
261                unique_refs.len()
262            );
263            break;
264        }
265    }
266
267    Ok(())
268}
269
270/// Output a complete annotated wiki.toml with all options.
271pub fn example_config() {
272    let sections = [
273        (
274            "# wiki.toml — Complete configuration reference\n\
275             #\n\
276             # Every available option with explanatory comments.\n\
277             # In practice, only include settings that differ from defaults.\n",
278            build_index_section(),
279        ),
280        (
281            "# Declare which directories contain wiki pages. Each entry is recursive\n\
282             # (includes all subdirectories).\n\
283             #\n\
284             # When multiple entries overlap (parent + child), the most-specific path wins\n\
285             # for per-page settings. This is the intended override mechanism:\n\
286             #   path = \"wiki\"          (parent, sets defaults for all of wiki/)\n\
287             #   path = \"wiki/papers\"   (child, overrides settings for wiki/papers/)\n\
288             #\n\
289             # If no [[directories]] are declared:\n\
290             #   Defaults to \"wiki/\" with autolink = true.\n\
291             #\n\
292             # If ANY [[directories]] are declared, the default is replaced entirely.\n",
293            build_directories_section(),
294        ),
295        ("", build_linking_section()),
296        (
297            "# Wiki-wide structural checks. These apply to all pages regardless of directory.\n\
298             # Values: \"error\" (causes exit code 2), \"warn\" (prints but exits 0), \"off\"\n",
299            build_checks_section(),
300        ),
301        (
302            "# Parameterized rules scoped to specific directories. Each rule has a `check`\n\
303             # type and a `severity` (\"error\", \"warn\", or \"off\").\n\
304             #\n\
305             # The `dirs` field uses path-prefix matching:\n\
306             #   dirs = [\"wiki\"] matches any page under wiki/ (including subdirectories)\n\
307             #   dirs = [\"wiki/concepts\"] matches only pages under wiki/concepts/\n",
308            build_rules_section(),
309        ),
310    ];
311
312    for (comment, toml) in &sections {
313        if !comment.is_empty() {
314            println!("{comment}");
315        }
316        print!("{toml}");
317    }
318}
319
320fn toml_array(items: &[&str]) -> toml::Value {
321    toml::Value::Array(
322        items
323            .iter()
324            .map(|s| toml::Value::String(s.to_string()))
325            .collect(),
326    )
327}
328
329fn build_index_section() -> String {
330    let mut tbl = toml::Table::new();
331    tbl.insert(
332        "index".to_owned(),
333        toml::Value::String("index.md".to_owned()),
334    );
335
336    let mut out = String::new();
337    out.push_str("# Index file path, relative to wiki root.\n");
338    out.push_str(
339        "# Scanned for wikilinks (index-coverage check) but NOT treated as a wiki page.\n",
340    );
341    out.push_str("# Default: \"index.md\". Set to \"\" to disable index coverage.\n");
342    out.push_str(&toml::to_string_pretty(&tbl).unwrap());
343    out
344}
345
346fn build_directories_section() -> String {
347    let dirs = vec![
348        (
349            "wiki",
350            true,
351            "# autolink: pages here feed bare-mention auto-linking.\n# When true, filename stems become patterns for `wiki links check`.\n# Default: true\n",
352        ),
353        (
354            "wiki/papers",
355            false,
356            "# Long, specific names are poor auto-link patterns — disable.\n",
357        ),
358        ("wiki/topics", false, ""),
359    ];
360
361    let mut out = String::new();
362    for (path, autolink, comment) in dirs {
363        if !comment.is_empty() {
364            out.push_str(comment);
365        }
366        out.push_str("[[directories]]\n");
367        out.push_str(&format!("path = \"{path}\"\n"));
368        out.push_str(&format!("autolink = {autolink}\n\n"));
369    }
370    out
371}
372
373fn build_linking_section() -> String {
374    let mut out = String::new();
375    out.push_str("[linking]\n");
376
377    out.push_str("# Page names to never auto-link, even in autolink=true directories.\n");
378    out.push_str("# Default: []\n");
379    let exclude = toml::Value::Array(vec![
380        toml::Value::String("the".to_owned()),
381        toml::Value::String("a".to_owned()),
382        toml::Value::String("an".to_owned()),
383    ]);
384    out.push_str(&format!("exclude = {exclude}\n\n"));
385
386    out.push_str("# Frontmatter field that pages can set to false to opt out of auto-linking.\n");
387    out.push_str("# Default: \"autolink\"\n");
388    out.push_str("autolink_field = \"autolink\"\n\n");
389
390    out
391}
392
393fn build_checks_section() -> String {
394    let mut out = String::new();
395    out.push_str("[checks]\n");
396
397    out.push_str("# Every [[wikilink]] must resolve to an existing page.\n");
398    out.push_str("# Fragment references ([[page#heading]], [[page#^block]]) are also validated.\n");
399    out.push_str("# Default: \"error\"\n");
400    out.push_str("broken_links = \"error\"\n\n");
401
402    out.push_str(
403        "# Every wiki page must have at least one inbound [[wikilink]] from another page.\n",
404    );
405    out.push_str("# Default: \"error\"\n");
406    out.push_str("orphan_pages = \"error\"\n\n");
407
408    out.push_str("# Every wiki page must be referenced via [[wikilink]] in the index file.\n");
409    out.push_str("# Only active if `index` is set and the file exists.\n");
410    out.push_str("# Default: \"error\"\n");
411    out.push_str("index_coverage = \"error\"\n\n");
412
413    out
414}
415
416fn build_rules_section() -> String {
417    let mut out = String::new();
418
419    // Required sections
420    out.push_str("# --- Required sections ---\n");
421    out.push_str("# Pages in the specified directories must contain these ## headings.\n\n");
422
423    out.push_str("[[rules]]\ncheck = \"required-sections\"\n");
424    out.push_str(&format!("dirs = {}\n", toml_array(&["wiki/concepts"])));
425    out.push_str(&format!(
426        "sections = {}\n",
427        toml_array(&["See also", "Viability check"])
428    ));
429    out.push_str("severity = \"error\"\n\n");
430
431    out.push_str("[[rules]]\ncheck = \"required-sections\"\n");
432    out.push_str(&format!("dirs = {}\n", toml_array(&["wiki/topics"])));
433    out.push_str(&format!("sections = {}\n", toml_array(&["See also"])));
434    out.push_str("severity = \"warn\"\n\n");
435
436    // Required frontmatter
437    out.push_str("# --- Required frontmatter fields ---\n");
438    out.push_str(
439        "# Pages in the specified directories must have these YAML frontmatter fields.\n\n",
440    );
441
442    out.push_str("[[rules]]\ncheck = \"required-frontmatter\"\n");
443    out.push_str(&format!(
444        "dirs = {}\n",
445        toml_array(&["wiki/concepts", "wiki/topics"])
446    ));
447    out.push_str(&format!(
448        "fields = {}\n",
449        toml_array(&["title", "tags", "date"])
450    ));
451    out.push_str("severity = \"error\"\n\n");
452
453    out.push_str("[[rules]]\ncheck = \"required-frontmatter\"\n");
454    out.push_str(&format!("dirs = {}\n", toml_array(&["wiki/papers"])));
455    out.push_str(&format!(
456        "fields = {}\n",
457        toml_array(&["title", "tags", "date", "sources"])
458    ));
459    out.push_str("severity = \"error\"\n\n");
460
461    // Mirror parity
462    out.push_str("# --- Mirror parity ---\n");
463    out.push_str(
464        "# Two directories must contain matching filenames (by stem, ignoring extension).\n",
465    );
466    out.push_str("# Useful for raw-source / processed-page pairs.\n");
467    out.push_str("# Note: `right` does NOT need to be a declared [[directories]] entry.\n\n");
468
469    out.push_str("[[rules]]\ncheck = \"mirror-parity\"\n");
470    out.push_str("left = \"wiki/papers\"\nright = \"raw/papers\"\n");
471    out.push_str("severity = \"error\"\n\n");
472
473    // Citation patterns
474    out.push_str("# --- Citation patterns ---\n");
475    out.push_str("# Detect references in prose that should have corresponding wiki pages.\n");
476    out.push_str("#\n");
477    out.push_str("# Each pattern has a regex with a named capture group `id`.\n");
478    out.push_str("# `match_in`: which directory to search for matching pages.\n");
479    out.push_str("# `match_mode`:\n");
480    out.push_str(
481        "#   \"content\"  - search page file contents for the captured ID string (default)\n",
482    );
483    out.push_str("#   \"filename\" - check if a page with the captured ID as filename exists\n");
484    out.push_str("#\n");
485    out.push_str("# Use `preset` instead of `pattern` for built-in patterns:\n");
486    out.push_str(
487        "#   \"bold-method-year\" - matches **MethodName** (Author, YEAR), checks filenames\n\n",
488    );
489
490    out.push_str("[[rules]]\ncheck = \"citation-pattern\"\nname = \"arxiv\"\n");
491    out.push_str(&format!(
492        "dirs = {}\n",
493        toml_array(&["wiki/concepts", "wiki/topics"])
494    ));
495    out.push_str("pattern = 'arxiv\\.org/abs/(?P<id>\\d{4}\\.\\d{4,5})'\n");
496    out.push_str("match_in = \"wiki/papers\"\nmatch_mode = \"content\"\nseverity = \"warn\"\n\n");
497
498    out.push_str("# Preset-based: no regex needed, preset bundles pattern + match_mode.\n");
499    out.push_str("[[rules]]\ncheck = \"citation-pattern\"\nname = \"bold-method\"\n");
500    out.push_str("preset = \"bold-method-year\"\n");
501    out.push_str(&format!(
502        "dirs = {}\n",
503        toml_array(&["wiki/concepts", "wiki/topics"])
504    ));
505    out.push_str("match_in = \"wiki/papers\"\nseverity = \"warn\"\n\n");
506
507    out.push_str("[[rules]]\ncheck = \"citation-pattern\"\nname = \"doi\"\n");
508    out.push_str(&format!("dirs = {}\n", toml_array(&["wiki"])));
509    out.push_str("pattern = 'doi\\.org/(?P<id>10\\.\\d{4,}/[^\\s)]+)'\n");
510    out.push_str("match_in = \"wiki/papers\"\nmatch_mode = \"content\"\nseverity = \"warn\"\n");
511
512    out
513}