1use std::collections::HashMap;
2
3use crate::error::WikiError;
4use crate::frontmatter;
5use crate::parse;
6use crate::wiki::WikiRoot;
7
8use super::{DirStats, detect_mirror_candidates, is_markdown_file};
9
10pub fn setup(root: &WikiRoot) -> Result<(), WikiError> {
12 let has_config = root.path().join("wiki.toml").is_file();
13 let version = env!("CARGO_PKG_VERSION");
14
15 print!(
16 r#"## Wiki Tool Setup (v{version})
17
18You are configuring a wiki for use with the `wiki` CLI tool.
19
20### Discover available commands
21
22Run `wiki --help` to see all top-level commands.
23Run `wiki <command> --help` for subcommand details (e.g., `wiki links --help`).
24
25"#
26 );
27
28 if has_config {
29 print!(
30 r#"### wiki.toml already exists
31
32A wiki.toml is present at the wiki root. Skip to validation.
33
341. Read the existing wiki.toml to understand the current configuration.
352. Run `wiki scan` to see the actual wiki structure and check for mismatches.
363. Run `wiki lint` and iterate:
37 - Real content problem -> fix the wiki content
38 - Config too strict or wrong scope -> adjust wiki.toml
39 - Uncertain -> ask the user
404. Run `wiki links check` to verify auto-linking candidates look right.
415. Once everything is clean, proceed to Step 6 (automated linting) and Step 7 (persist).
42
43"#
44 );
45 } else {
46 print!(
47 r#"### Step 1: Scan the wiki structure
48
49Run: `wiki scan`
50
51This outputs per-directory statistics: file counts, frontmatter field coverage,
52common section headings, and detected mirror candidates.
53
54### Step 2: Learn the config schema
55
56Run: `wiki setup example-config`
57
58This outputs a complete wiki.toml with every option, annotated with comments.
59Study it to understand what's available.
60
61### Step 3: Generate and customize wiki.toml
62
63Run: `wiki setup init`
64
65This generates a starting-point wiki.toml. Edit it to customize:
66- Set `autolink = false` on directories whose page names are too long or specific
67 to be useful auto-link patterns (dates, identifiers, compound slugs)
68- Add `[[rules]]` for required sections, required frontmatter, mirror parity
69- Add citation patterns if the wiki tracks references to external sources
70- Adjust `[checks]` severities if needed
71
72### Step 4: Validate iteratively
73
74Run: `wiki lint`
75
76For each finding:
77- Real content problem -> fix the wiki content
78- Config too strict or wrong scope -> adjust wiki.toml
79- Uncertain -> ask the user
80
81Use `wiki lint --severity error` to focus on blocking issues first.
82Use `wiki lint --severity warn` to review advisories separately.
83
84Repeat until `wiki lint` exits clean.
85
86### Step 5: Verify commands
87
88Run and verify output makes sense:
89- `wiki links check` — bare mentions should be genuine misses, not false positives
90- `wiki links broken` — should be empty if the wiki is healthy
91- `wiki refs to <pick a page from the wiki>` — verify the link graph looks right
92- Review `wiki scan` output for inconsistent section headings across directories
93 and use `wiki sections rename` to standardize them
94
95"#
96 );
97 }
98
99 print!(
100 r#"### Step 6: Set up automated linting
101
102Configure `wiki lint` to run automatically before commits. Options:
103- Git pre-commit hook (`.githooks/pre-commit` or `.git/hooks/pre-commit`)
104- Agent hook (e.g., Claude Code `pre-commit` hook in `.claude/settings.json`)
105- Both, if the wiki is edited by agents and humans
106
107Choose what fits this project's setup.
108
109### Step 7: Update project documentation
110
111Check if project documentation (CLAUDE.md, AGENTS.md, .cursorrules, or equivalent)
112already references wiki tooling commands.
113
114If it does:
115- Update command references to match the current CLI (`wiki --help`)
116- Remove references to commands that no longer exist
117- Verify workflow instructions use the correct command names and flags
118
119If it doesn't:
120- Add a tooling section documenting the key commands and when to use them
121- Integrate commands into existing workflow documentation where relevant
122 (e.g., "run `wiki links fix --write` after ingest" in an ingest workflow)
123
124Key commands the documentation should cover:
125- `wiki lint` — structural integrity check (before commits)
126- `wiki links check` / `wiki links fix --write` — bare mention detection (after page creation)
127- `wiki rename <old> <new> --write` — page rename with reference update
128- `wiki refs to <page>` — impact analysis before editing
129- `wiki sections rename <old> <new> --write` — heading standardization
130- `wiki setup prompt` — re-read these instructions
131"#
132 );
133
134 Ok(())
135}
136
137pub fn scan(root: &WikiRoot) -> Result<(), WikiError> {
139 let wiki_root = root.path();
140
141 let mut dir_stats: HashMap<String, DirStats> = HashMap::new();
143
144 for entry in ignore::WalkBuilder::new(wiki_root).hidden(false).build() {
145 let entry = entry.map_err(|e| WikiError::Walk {
146 path: wiki_root.to_path_buf(),
147 source: e,
148 })?;
149 let path = entry.path();
150 if !is_markdown_file(path) {
151 continue;
152 }
153
154 let rel_path = path.strip_prefix(wiki_root).unwrap_or(path);
155 let dir = rel_path
156 .parent()
157 .and_then(|p| p.to_str())
158 .unwrap_or(".")
159 .to_owned();
160
161 let stats = dir_stats.entry(dir).or_default();
162 stats.file_count += 1;
163
164 let source = std::fs::read_to_string(path).map_err(|e| WikiError::ReadFile {
165 path: path.to_path_buf(),
166 source: e,
167 })?;
168
169 if let Ok(Some(fm)) = frontmatter::parse_frontmatter(&source)
171 && let serde_yml::Value::Mapping(map) = fm.data()
172 {
173 for key in map.keys() {
174 if let Some(key_str) = key.as_str() {
175 *stats
176 .frontmatter_fields
177 .entry(key_str.to_owned())
178 .or_insert(0) += 1;
179 }
180 }
181 }
182
183 let headings = parse::extract_headings(&source);
185 for h in &headings {
186 if h.level == 2 {
187 *stats.section_headings.entry(h.text.clone()).or_insert(0) += 1;
188 }
189 }
190 }
191
192 let mut dirs: Vec<_> = dir_stats.into_iter().collect();
194 dirs.sort_by(|a, b| a.0.cmp(&b.0));
195
196 for (dir, stats) in &dirs {
197 let display_dir = if dir.is_empty() { "." } else { dir.as_str() };
198 println!(
199 "## Directory: {display_dir}/ ({} files)\n",
200 stats.file_count
201 );
202
203 if !stats.frontmatter_fields.is_empty() {
204 println!("Frontmatter fields:");
205 let mut fields: Vec<_> = stats.frontmatter_fields.iter().collect();
206 fields.sort_by(|a, b| b.1.cmp(a.1));
207 for (field, count) in &fields {
208 let pct = **count as f64 / stats.file_count as f64 * 100.0;
209 println!(" {field:20} {count}/{} ({pct:.0}%)", stats.file_count);
210 }
211 } else {
212 println!(" No frontmatter detected.");
213 }
214
215 if !stats.section_headings.is_empty() {
216 println!("\nSection headings (## level):");
217 let mut headings: Vec<_> = stats.section_headings.iter().collect();
218 headings.sort_by(|a, b| b.1.cmp(a.1));
219 for (heading, count) in headings.iter().take(10) {
220 let pct = **count as f64 / stats.file_count as f64 * 100.0;
221 println!(
222 " \"{heading:18}\" {count}/{} ({pct:.0}%)",
223 stats.file_count
224 );
225 }
226 if headings.len() > 10 {
227 println!(" ... and {} more", headings.len() - 10);
228 }
229 }
230
231 println!();
232 }
233
234 let dir_counts: Vec<(String, usize)> = dirs
236 .iter()
237 .map(|(dir, stats)| (dir.clone(), stats.file_count))
238 .collect();
239 let mirror_candidates = detect_mirror_candidates(&dir_counts);
240 if !mirror_candidates.is_empty() {
241 println!("## Mirror candidates\n");
242 for (a, b, count) in &mirror_candidates {
243 println!(" {a}/ ({count} files) <-> {b}/ ({count} files)");
244 }
245 println!();
246 }
247
248 for candidate in &["index.md", "README.md", "_index.md"] {
250 let path = wiki_root.join(candidate);
251 if path.is_file() {
252 let source = std::fs::read_to_string(&path).map_err(|e| WikiError::ReadFile {
253 path: path.clone(),
254 source: e,
255 })?;
256 let wikilinks = parse::extract_wikilinks(&source);
257 let unique_refs: std::collections::HashSet<&str> =
258 wikilinks.iter().map(|wl| wl.page.as_str()).collect();
259 println!(
260 "## Index: {candidate}\n References {} unique page names via wikilinks\n",
261 unique_refs.len()
262 );
263 break;
264 }
265 }
266
267 Ok(())
268}
269
270pub fn example_config() {
272 let sections = [
273 (
274 "# wiki.toml — Complete configuration reference\n\
275 #\n\
276 # Every available option with explanatory comments.\n\
277 # In practice, only include settings that differ from defaults.\n",
278 build_index_section(),
279 ),
280 (
281 "# Declare which directories contain wiki pages. Each entry is recursive\n\
282 # (includes all subdirectories).\n\
283 #\n\
284 # When multiple entries overlap (parent + child), the most-specific path wins\n\
285 # for per-page settings. This is the intended override mechanism:\n\
286 # path = \"wiki\" (parent, sets defaults for all of wiki/)\n\
287 # path = \"wiki/papers\" (child, overrides settings for wiki/papers/)\n\
288 #\n\
289 # If no [[directories]] are declared:\n\
290 # Defaults to \"wiki/\" with autolink = true.\n\
291 #\n\
292 # If ANY [[directories]] are declared, the default is replaced entirely.\n",
293 build_directories_section(),
294 ),
295 ("", build_linking_section()),
296 (
297 "# Wiki-wide structural checks. These apply to all pages regardless of directory.\n\
298 # Values: \"error\" (causes exit code 2), \"warn\" (prints but exits 0), \"off\"\n",
299 build_checks_section(),
300 ),
301 (
302 "# Parameterized rules scoped to specific directories. Each rule has a `check`\n\
303 # type and a `severity` (\"error\", \"warn\", or \"off\").\n\
304 #\n\
305 # The `dirs` field uses path-prefix matching:\n\
306 # dirs = [\"wiki\"] matches any page under wiki/ (including subdirectories)\n\
307 # dirs = [\"wiki/concepts\"] matches only pages under wiki/concepts/\n",
308 build_rules_section(),
309 ),
310 ];
311
312 for (comment, toml) in §ions {
313 if !comment.is_empty() {
314 println!("{comment}");
315 }
316 print!("{toml}");
317 }
318}
319
320fn toml_array(items: &[&str]) -> toml::Value {
321 toml::Value::Array(
322 items
323 .iter()
324 .map(|s| toml::Value::String(s.to_string()))
325 .collect(),
326 )
327}
328
329fn build_index_section() -> String {
330 let mut tbl = toml::Table::new();
331 tbl.insert(
332 "index".to_owned(),
333 toml::Value::String("index.md".to_owned()),
334 );
335
336 let mut out = String::new();
337 out.push_str("# Index file path, relative to wiki root.\n");
338 out.push_str(
339 "# Scanned for wikilinks (index-coverage check) but NOT treated as a wiki page.\n",
340 );
341 out.push_str("# Default: \"index.md\". Set to \"\" to disable index coverage.\n");
342 out.push_str(&toml::to_string_pretty(&tbl).unwrap());
343 out
344}
345
346fn build_directories_section() -> String {
347 let dirs = vec![
348 (
349 "wiki",
350 true,
351 "# autolink: pages here feed bare-mention auto-linking.\n# When true, filename stems become patterns for `wiki links check`.\n# Default: true\n",
352 ),
353 (
354 "wiki/papers",
355 false,
356 "# Long, specific names are poor auto-link patterns — disable.\n",
357 ),
358 ("wiki/topics", false, ""),
359 ];
360
361 let mut out = String::new();
362 for (path, autolink, comment) in dirs {
363 if !comment.is_empty() {
364 out.push_str(comment);
365 }
366 out.push_str("[[directories]]\n");
367 out.push_str(&format!("path = \"{path}\"\n"));
368 out.push_str(&format!("autolink = {autolink}\n\n"));
369 }
370 out
371}
372
373fn build_linking_section() -> String {
374 let mut out = String::new();
375 out.push_str("[linking]\n");
376
377 out.push_str("# Page names to never auto-link, even in autolink=true directories.\n");
378 out.push_str("# Default: []\n");
379 let exclude = toml::Value::Array(vec![
380 toml::Value::String("the".to_owned()),
381 toml::Value::String("a".to_owned()),
382 toml::Value::String("an".to_owned()),
383 ]);
384 out.push_str(&format!("exclude = {exclude}\n\n"));
385
386 out.push_str("# Frontmatter field that pages can set to false to opt out of auto-linking.\n");
387 out.push_str("# Default: \"autolink\"\n");
388 out.push_str("autolink_field = \"autolink\"\n\n");
389
390 out
391}
392
393fn build_checks_section() -> String {
394 let mut out = String::new();
395 out.push_str("[checks]\n");
396
397 out.push_str("# Every [[wikilink]] must resolve to an existing page.\n");
398 out.push_str("# Fragment references ([[page#heading]], [[page#^block]]) are also validated.\n");
399 out.push_str("# Default: \"error\"\n");
400 out.push_str("broken_links = \"error\"\n\n");
401
402 out.push_str(
403 "# Every wiki page must have at least one inbound [[wikilink]] from another page.\n",
404 );
405 out.push_str("# Default: \"error\"\n");
406 out.push_str("orphan_pages = \"error\"\n\n");
407
408 out.push_str("# Every wiki page must be referenced via [[wikilink]] in the index file.\n");
409 out.push_str("# Only active if `index` is set and the file exists.\n");
410 out.push_str("# Default: \"error\"\n");
411 out.push_str("index_coverage = \"error\"\n\n");
412
413 out
414}
415
416fn build_rules_section() -> String {
417 let mut out = String::new();
418
419 out.push_str("# --- Required sections ---\n");
421 out.push_str("# Pages in the specified directories must contain these ## headings.\n\n");
422
423 out.push_str("[[rules]]\ncheck = \"required-sections\"\n");
424 out.push_str(&format!("dirs = {}\n", toml_array(&["wiki/concepts"])));
425 out.push_str(&format!(
426 "sections = {}\n",
427 toml_array(&["See also", "Viability check"])
428 ));
429 out.push_str("severity = \"error\"\n\n");
430
431 out.push_str("[[rules]]\ncheck = \"required-sections\"\n");
432 out.push_str(&format!("dirs = {}\n", toml_array(&["wiki/topics"])));
433 out.push_str(&format!("sections = {}\n", toml_array(&["See also"])));
434 out.push_str("severity = \"warn\"\n\n");
435
436 out.push_str("# --- Required frontmatter fields ---\n");
438 out.push_str(
439 "# Pages in the specified directories must have these YAML frontmatter fields.\n\n",
440 );
441
442 out.push_str("[[rules]]\ncheck = \"required-frontmatter\"\n");
443 out.push_str(&format!(
444 "dirs = {}\n",
445 toml_array(&["wiki/concepts", "wiki/topics"])
446 ));
447 out.push_str(&format!(
448 "fields = {}\n",
449 toml_array(&["title", "tags", "date"])
450 ));
451 out.push_str("severity = \"error\"\n\n");
452
453 out.push_str("[[rules]]\ncheck = \"required-frontmatter\"\n");
454 out.push_str(&format!("dirs = {}\n", toml_array(&["wiki/papers"])));
455 out.push_str(&format!(
456 "fields = {}\n",
457 toml_array(&["title", "tags", "date", "sources"])
458 ));
459 out.push_str("severity = \"error\"\n\n");
460
461 out.push_str("# --- Mirror parity ---\n");
463 out.push_str(
464 "# Two directories must contain matching filenames (by stem, ignoring extension).\n",
465 );
466 out.push_str("# Useful for raw-source / processed-page pairs.\n");
467 out.push_str("# Note: `right` does NOT need to be a declared [[directories]] entry.\n\n");
468
469 out.push_str("[[rules]]\ncheck = \"mirror-parity\"\n");
470 out.push_str("left = \"wiki/papers\"\nright = \"raw/papers\"\n");
471 out.push_str("severity = \"error\"\n\n");
472
473 out.push_str("# --- Citation patterns ---\n");
475 out.push_str("# Detect references in prose that should have corresponding wiki pages.\n");
476 out.push_str("#\n");
477 out.push_str("# Each pattern has a regex with a named capture group `id`.\n");
478 out.push_str("# `match_in`: which directory to search for matching pages.\n");
479 out.push_str("# `match_mode`:\n");
480 out.push_str(
481 "# \"content\" - search page file contents for the captured ID string (default)\n",
482 );
483 out.push_str("# \"filename\" - check if a page with the captured ID as filename exists\n");
484 out.push_str("#\n");
485 out.push_str("# Use `preset` instead of `pattern` for built-in patterns:\n");
486 out.push_str(
487 "# \"bold-method-year\" - matches **MethodName** (Author, YEAR), checks filenames\n\n",
488 );
489
490 out.push_str("[[rules]]\ncheck = \"citation-pattern\"\nname = \"arxiv\"\n");
491 out.push_str(&format!(
492 "dirs = {}\n",
493 toml_array(&["wiki/concepts", "wiki/topics"])
494 ));
495 out.push_str("pattern = 'arxiv\\.org/abs/(?P<id>\\d{4}\\.\\d{4,5})'\n");
496 out.push_str("match_in = \"wiki/papers\"\nmatch_mode = \"content\"\nseverity = \"warn\"\n\n");
497
498 out.push_str("# Preset-based: no regex needed, preset bundles pattern + match_mode.\n");
499 out.push_str("[[rules]]\ncheck = \"citation-pattern\"\nname = \"bold-method\"\n");
500 out.push_str("preset = \"bold-method-year\"\n");
501 out.push_str(&format!(
502 "dirs = {}\n",
503 toml_array(&["wiki/concepts", "wiki/topics"])
504 ));
505 out.push_str("match_in = \"wiki/papers\"\nseverity = \"warn\"\n\n");
506
507 out.push_str("[[rules]]\ncheck = \"citation-pattern\"\nname = \"doi\"\n");
508 out.push_str(&format!("dirs = {}\n", toml_array(&["wiki"])));
509 out.push_str("pattern = 'doi\\.org/(?P<id>10\\.\\d{4,}/[^\\s)]+)'\n");
510 out.push_str("match_in = \"wiki/papers\"\nmatch_mode = \"content\"\nseverity = \"warn\"\n");
511
512 out
513}