1use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque};
2use std::fmt::Write as _;
3use std::path::{Path, PathBuf};
4
5use gobby_core::ai::{daemon::generate_via_daemon, effective_route, text::generate_text};
6use gobby_core::ai_context::{AiConfigSource, AiContext, AiContextOptions, PostgresAiConfigSource};
7use gobby_core::config::{AiCapability, AiRouting};
8use serde::{Deserialize, Serialize};
9
10use crate::commands::scope;
11use crate::config::{self, Context};
12use crate::db;
13use crate::falkor;
14use crate::index::hasher;
15use crate::models::Symbol;
16use crate::output::{self, Format};
17use crate::secrets;
18use crate::visibility;
19
20const DEFAULT_OUT_DIR: &str = "codewiki";
21const CODEWIKI_META_PATH: &str = "_meta/codewiki.json";
22const MAX_MERMAID_HOPS: usize = 2;
23const MAX_MERMAID_EDGES: usize = 20;
24
25mod prompts {
26 use std::fmt::Write as _;
27
28 use crate::models::Symbol;
29
30 pub const SYMBOL_SYSTEM: &str = "You write concise API reference notes. Return one sentence describing the symbol's purpose. Do not include markdown fences.";
31 pub const FILE_SYSTEM: &str = "You write concise file-level code documentation. Return a short purpose summary that reuses the supplied symbol summaries. Do not include markdown fences.";
32 pub const MODULE_SYSTEM: &str = "You write concise module overviews for code documentation. Return a short overview from the supplied child summaries. Do not include markdown fences.";
33 pub const REPO_SYSTEM: &str = "You write concise repository overviews for code documentation. Return a short overview from the supplied module summaries. Do not include markdown fences.";
34
35 pub fn symbol_prompt(symbol: &Symbol) -> String {
36 let mut prompt = format!(
37 "File: {}\nSymbol: {} [{}]\nLines: {}-{}",
38 symbol.file_path,
39 symbol.qualified_name,
40 symbol.kind,
41 symbol.line_start,
42 symbol.line_end
43 );
44 if let Some(signature) = symbol
45 .signature
46 .as_deref()
47 .filter(|value| !value.is_empty())
48 {
49 let _ = write!(prompt, "\nSignature: {signature}");
50 }
51 if let Some(docstring) = symbol
52 .docstring
53 .as_deref()
54 .filter(|value| !value.is_empty())
55 {
56 let _ = write!(prompt, "\nExisting docs: {docstring}");
57 }
58 prompt
59 }
60
61 pub fn file_prompt(file: &str, symbols: &[SymbolSummary]) -> String {
62 let mut prompt =
63 format!("Summarize this file once from its AST symbols.\n\nFile: {file}\n\nSymbols:\n");
64 if symbols.is_empty() {
65 prompt.push_str("- No indexed symbols.\n");
66 } else {
67 for symbol in symbols {
68 let _ = writeln!(
69 prompt,
70 "- {} [{}] component {} ({}) lines {}-{}: {}",
71 symbol.name,
72 symbol.kind,
73 symbol.component_label,
74 symbol.component_id,
75 symbol.line_start,
76 symbol.line_end,
77 symbol.purpose
78 );
79 }
80 }
81 prompt
82 }
83
84 pub fn module_prompt(
85 module: &str,
86 files: &[ChildSummary],
87 modules: &[ChildSummary],
88 components: &[String],
89 ) -> String {
90 let mut prompt = format!(
91 "Summarize this module once from lower-level summaries.\n\nModule: {module}\n\nFiles:\n"
92 );
93 if files.is_empty() {
94 prompt.push_str("- No direct files.\n");
95 } else {
96 for file in files {
97 let _ = writeln!(prompt, "- {}: {}", file.name, file.summary);
98 }
99 }
100 prompt.push_str("\nChild modules:\n");
101 if modules.is_empty() {
102 prompt.push_str("- No child modules.\n");
103 } else {
104 for module in modules {
105 let _ = writeln!(prompt, "- {}: {}", module.name, module.summary);
106 }
107 }
108 prompt.push_str("\nStable component IDs:\n");
109 if components.is_empty() {
110 prompt.push_str("- No indexed components.\n");
111 } else {
112 for component in components {
113 let _ = writeln!(prompt, "- {component}");
114 }
115 }
116 prompt
117 }
118
119 pub fn repo_prompt(modules: &[ChildSummary], files: &[ChildSummary]) -> String {
120 let mut prompt =
121 "Summarize this repository once from module and root-file summaries.\n\nModules:\n"
122 .to_string();
123 if modules.is_empty() {
124 prompt.push_str("- No modules.\n");
125 } else {
126 for module in modules {
127 let _ = writeln!(prompt, "- {}: {}", module.name, module.summary);
128 }
129 }
130 prompt.push_str("\nRoot files:\n");
131 if files.is_empty() {
132 prompt.push_str("- No root files.\n");
133 } else {
134 for file in files {
135 let _ = writeln!(prompt, "- {}: {}", file.name, file.summary);
136 }
137 }
138 prompt
139 }
140
141 #[derive(Debug, Clone)]
142 pub struct SymbolSummary {
143 pub name: String,
144 pub kind: String,
145 pub component_id: String,
146 pub component_label: String,
147 pub line_start: usize,
148 pub line_end: usize,
149 pub purpose: String,
150 }
151
152 #[derive(Debug, Clone)]
153 pub struct ChildSummary {
154 pub name: String,
155 pub summary: String,
156 }
157}
158
159#[derive(Debug, Clone)]
160pub struct CodewikiInput {
161 pub files: Vec<String>,
162 pub graph_edges: Vec<CodewikiGraphEdge>,
163 pub graph_availability: CodewikiGraphAvailability,
164 pub symbols: Vec<Symbol>,
165}
166
167#[derive(Debug, Clone, PartialEq, Eq)]
168pub struct CodewikiGraphEdge {
169 pub source_component_id: String,
170 pub target_component_id: String,
171 pub kind: CodewikiGraphEdgeKind,
172}
173
174impl CodewikiGraphEdge {
175 pub fn call(
176 source_component_id: impl Into<String>,
177 target_component_id: impl Into<String>,
178 ) -> Self {
179 Self {
180 source_component_id: source_component_id.into(),
181 target_component_id: target_component_id.into(),
182 kind: CodewikiGraphEdgeKind::Call,
183 }
184 }
185
186 pub fn import(
187 source_component_id: impl Into<String>,
188 target_component_id: impl Into<String>,
189 ) -> Self {
190 Self {
191 source_component_id: source_component_id.into(),
192 target_component_id: target_component_id.into(),
193 kind: CodewikiGraphEdgeKind::Import,
194 }
195 }
196}
197
198#[derive(Debug, Clone, Copy, PartialEq, Eq)]
199pub enum CodewikiGraphEdgeKind {
200 Call,
201 Import,
202}
203
204#[derive(Debug, Clone)]
205struct CodewikiGraph {
206 edges: Vec<CodewikiGraphEdge>,
207 availability: CodewikiGraphAvailability,
208}
209
210impl CodewikiGraph {
211 fn available(edges: Vec<CodewikiGraphEdge>) -> Self {
212 Self {
213 edges,
214 availability: CodewikiGraphAvailability::Available,
215 }
216 }
217
218 fn unavailable() -> Self {
219 Self {
220 edges: Vec::new(),
221 availability: CodewikiGraphAvailability::Unavailable,
222 }
223 }
224}
225
226#[derive(Debug, Clone, Copy, PartialEq, Eq)]
227pub enum CodewikiGraphAvailability {
228 Available,
229 Unavailable,
230}
231
232#[derive(Debug, Clone)]
233struct FileDoc {
234 path: String,
235 module: String,
236 summary: String,
237 source_spans: Vec<SourceSpan>,
238 symbols: Vec<SymbolDoc>,
239 component_ids: Vec<String>,
240}
241
242#[derive(Debug, Clone)]
243struct SymbolDoc {
244 symbol: Symbol,
245 purpose: String,
246 component_id: String,
247 component_label: String,
248 source_span: SourceSpan,
249}
250
251#[derive(Debug, Clone)]
252struct ModuleDoc {
253 module: String,
254 summary: String,
255 source_spans: Vec<SourceSpan>,
256 direct_files: Vec<FileLink>,
257 child_modules: Vec<ModuleLink>,
258 component_ids: Vec<String>,
259 dependency_diagram: Option<String>,
260 call_diagram: Option<String>,
261 graph_availability: CodewikiGraphAvailability,
262}
263
264#[derive(Debug, Clone)]
265struct FileLink {
266 path: String,
267 summary: String,
268 source_spans: Vec<SourceSpan>,
269}
270
271#[derive(Debug, Clone)]
272struct ModuleLink {
273 module: String,
274 summary: String,
275 source_spans: Vec<SourceSpan>,
276}
277
278#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]
279struct SourceSpan {
280 file: String,
281 line_start: usize,
282 line_end: usize,
283}
284
285#[derive(Debug, Clone, Serialize)]
286pub struct CodewikiRunSummary {
287 pub command: &'static str,
288 pub project_id: String,
289 pub project_root: String,
290 pub out_dir: String,
291 pub generated_pages: usize,
292 pub changed_paths: Vec<String>,
293 pub skipped: usize,
294 pub files: usize,
295 pub modules: usize,
296 pub symbols: usize,
297 pub ai_enabled: bool,
298}
299
300#[derive(Debug, Clone, Default, Deserialize, Serialize)]
301struct CodewikiMeta {
302 docs: BTreeMap<String, CodewikiDocMeta>,
303 generated_docs: Vec<String>,
304}
305
306#[derive(Debug, Clone, Default, Deserialize, Eq, PartialEq, Serialize)]
307struct CodewikiDocMeta {
308 source_hashes: BTreeMap<String, String>,
309}
310
311pub type TextGenerator<'a> = dyn FnMut(&str, &str) -> Option<String> + 'a;
312
313pub fn run(
314 ctx: &Context,
315 out: Option<String>,
316 scope_args: Vec<String>,
317 ai: Option<AiRouting>,
318 format: Format,
319) -> anyhow::Result<()> {
320 let mut conn = db::connect_readonly(&ctx.database_url)?;
321 let scopes = scope_args
322 .iter()
323 .map(|value| scope::normalize_file_arg(ctx, value))
324 .collect::<Vec<_>>();
325 let files = visibility::visible_tree(&mut conn, ctx)?
326 .into_iter()
327 .map(|file| file.file_path)
328 .filter(|file| in_scope(file, &scopes))
329 .collect::<Vec<_>>();
330 let mut symbols = Vec::new();
331 for file in &files {
332 symbols.extend(visibility::visible_symbols_for_file(&mut conn, ctx, file)?);
333 }
334
335 let graph = fetch_codewiki_graph_edges(ctx, &files, &symbols)?;
336 let input = CodewikiInput {
337 files,
338 graph_edges: graph.edges,
339 graph_availability: graph.availability,
340 symbols,
341 };
342 let mut generator = resolve_text_generator(ctx, ai);
343 let ai_enabled = generator.is_some();
344 let docs = match generator.as_deref_mut() {
345 Some(generate) => generate_hierarchical_docs(&input, Some(generate)),
346 None => generate_hierarchical_docs(&input, None),
347 };
348 let module_count = docs
349 .iter()
350 .filter(|(path, _)| path.starts_with("modules/"))
351 .count();
352 let file_count = docs
353 .iter()
354 .filter(|(path, _)| path.starts_with("files/"))
355 .count();
356 let symbol_count = input
357 .symbols
358 .iter()
359 .filter(|symbol| is_core_file(&symbol.file_path))
360 .count();
361 let out_dir = out.unwrap_or_else(|| DEFAULT_OUT_DIR.to_string());
362 let changed_paths = write_incremental_doc_set(&ctx.project_root, Path::new(&out_dir), &docs)?;
363 let generated_pages = docs.len();
364 let skipped = generated_pages.saturating_sub(changed_paths.len());
365
366 let summary = CodewikiRunSummary {
367 command: "codewiki",
368 project_id: ctx.project_id.clone(),
369 project_root: ctx.project_root.display().to_string(),
370 out_dir,
371 generated_pages,
372 changed_paths,
373 skipped,
374 files: file_count,
375 modules: module_count,
376 symbols: symbol_count,
377 ai_enabled,
378 };
379 match format {
380 Format::Json => output::print_json(&summary),
381 Format::Text => output::print_text(&format!(
382 "wrote {} file docs, {} module docs, and repo.md to {}",
383 summary.files, summary.modules, summary.out_dir
384 )),
385 }
386}
387
388pub fn generate_hierarchical_docs(
389 input: &CodewikiInput,
390 generate: Option<&mut TextGenerator<'_>>,
391) -> Vec<(String, String)> {
392 generate_hierarchical_docs_with_graph_availability(input, generate)
393}
394
395fn generate_hierarchical_docs_with_graph_availability(
396 input: &CodewikiInput,
397 mut generate: Option<&mut TextGenerator<'_>>,
398) -> Vec<(String, String)> {
399 let mut files = input
400 .files
401 .iter()
402 .filter(|file| is_core_file(file))
403 .cloned()
404 .collect::<BTreeSet<_>>();
405 for symbol in &input.symbols {
406 if is_core_file(&symbol.file_path) {
407 files.insert(symbol.file_path.clone());
408 }
409 }
410 let files = files.into_iter().collect::<Vec<_>>();
411
412 let mut symbols_by_file: BTreeMap<String, Vec<Symbol>> = BTreeMap::new();
413 for symbol in &input.symbols {
414 if !is_core_file(&symbol.file_path) {
415 continue;
416 }
417 symbols_by_file
418 .entry(symbol.file_path.clone())
419 .or_default()
420 .push(symbol.clone());
421 }
422 for symbols in symbols_by_file.values_mut() {
423 symbols.sort_by_key(|symbol| (symbol.line_start, symbol.byte_start, symbol.name.clone()));
424 }
425
426 let file_modules = cluster_file_modules(&files, &symbols_by_file, &input.graph_edges);
427 let file_docs = files
428 .iter()
429 .map(|file| {
430 build_file_doc(
431 file,
432 file_modules
433 .get(file)
434 .cloned()
435 .unwrap_or_else(|| module_for_file(file)),
436 symbols_by_file.remove(file).unwrap_or_default(),
437 &mut generate,
438 )
439 })
440 .collect::<Vec<_>>();
441 let module_docs = build_module_docs(
442 &file_docs,
443 &input.graph_edges,
444 input.graph_availability,
445 &mut generate,
446 );
447 let repo_doc = build_repo_doc(&file_docs, &module_docs, &mut generate);
448
449 let mut docs = Vec::new();
450 docs.push(("repo.md".to_string(), repo_doc));
451 for module in &module_docs {
452 docs.push((module_doc_path(&module.module), render_module_doc(module)));
453 }
454 for file in &file_docs {
455 docs.push((file_doc_path(&file.path), render_file_doc(file)));
456 }
457 docs
458}
459
460pub fn write_doc_set(out_dir: &Path, docs: &[(String, String)]) -> anyhow::Result<()> {
461 std::fs::create_dir_all(out_dir)?;
462 for (relative_path, content) in docs {
463 write_doc(out_dir, relative_path, content)?;
464 }
465 Ok(())
466}
467
468pub fn write_incremental_doc_set(
469 project_root: &Path,
470 out_dir: &Path,
471 docs: &[(String, String)],
472) -> anyhow::Result<Vec<String>> {
473 std::fs::create_dir_all(out_dir)?;
474 let previous = read_codewiki_meta(out_dir)?;
475 let mut next_docs = BTreeMap::new();
476 let mut generated_docs = Vec::new();
477
478 for (relative_path, content) in docs {
479 let doc_meta = CodewikiDocMeta {
480 source_hashes: source_hashes_for_doc(project_root, content)?,
481 };
482 let target = safe_doc_path(out_dir, relative_path)?;
483 let unchanged = target.exists()
484 && previous
485 .docs
486 .get(relative_path)
487 .is_some_and(|previous_meta| previous_meta == &doc_meta);
488
489 if !unchanged {
490 write_doc(out_dir, relative_path, content)?;
491 generated_docs.push(relative_path.clone());
492 }
493 next_docs.insert(relative_path.clone(), doc_meta);
494 }
495
496 for stale_path in previous
497 .docs
498 .keys()
499 .filter(|key| !next_docs.contains_key(*key))
500 {
501 let target = safe_doc_path(out_dir, stale_path)?;
502 reject_symlinked_doc_path(out_dir, &target)?;
503 match std::fs::remove_file(&target) {
504 Ok(()) => prune_empty_doc_dirs(out_dir, &target)?,
505 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
506 Err(err) => return Err(err.into()),
507 }
508 }
509
510 let meta = CodewikiMeta {
511 docs: next_docs,
512 generated_docs: generated_docs.clone(),
513 };
514 write_codewiki_meta(out_dir, &meta)?;
515 Ok(generated_docs)
516}
517
518fn write_doc(out_dir: &Path, relative_path: &str, content: &str) -> anyhow::Result<()> {
519 let target = safe_doc_path(out_dir, relative_path)?;
520 reject_symlinked_doc_path(out_dir, &target)?;
521 if let Some(parent) = target.parent() {
522 std::fs::create_dir_all(parent)?;
523 }
524 std::fs::write(target, content)?;
525 Ok(())
526}
527
528fn reject_symlinked_doc_path(out_dir: &Path, target: &Path) -> anyhow::Result<()> {
529 let relative = target.strip_prefix(out_dir)?;
530 let mut current = out_dir.to_path_buf();
531 for component in relative.components() {
532 current.push(component);
533 match std::fs::symlink_metadata(¤t) {
534 Ok(metadata) if metadata.file_type().is_symlink() => {
535 anyhow::bail!(
536 "refusing to follow symlinked codewiki path: {}",
537 current.display()
538 );
539 }
540 Ok(_) => {}
541 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
542 Err(err) => return Err(err.into()),
543 }
544 }
545 Ok(())
546}
547
548fn prune_empty_doc_dirs(out_dir: &Path, target: &Path) -> anyhow::Result<()> {
549 let mut current = target.parent();
550 while let Some(dir) = current {
551 if dir == out_dir {
552 break;
553 }
554 match std::fs::remove_dir(dir) {
555 Ok(()) => current = dir.parent(),
556 Err(err)
557 if matches!(
558 err.kind(),
559 std::io::ErrorKind::NotFound | std::io::ErrorKind::DirectoryNotEmpty
560 ) =>
561 {
562 break;
563 }
564 Err(err) => return Err(err.into()),
565 }
566 }
567 Ok(())
568}
569
570fn read_codewiki_meta(out_dir: &Path) -> anyhow::Result<CodewikiMeta> {
571 let path = safe_doc_path(out_dir, CODEWIKI_META_PATH)?;
572 match std::fs::read_to_string(&path) {
573 Ok(raw) => Ok(serde_json::from_str(&raw)?),
574 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(CodewikiMeta::default()),
575 Err(err) => Err(err.into()),
576 }
577}
578
579fn write_codewiki_meta(out_dir: &Path, meta: &CodewikiMeta) -> anyhow::Result<()> {
580 let content = serde_json::to_string_pretty(meta)?;
581 write_doc(out_dir, CODEWIKI_META_PATH, &(content + "\n"))
582}
583
584fn source_hashes_for_doc(
585 project_root: &Path,
586 content: &str,
587) -> anyhow::Result<BTreeMap<String, String>> {
588 let mut hashes = BTreeMap::new();
589 for file in source_files_from_frontmatter(content) {
590 let hash = hasher::file_content_hash(&project_root.join(&file))
591 .map_err(|err| anyhow::anyhow!("failed to hash codewiki source file {file}: {err}"))?;
592 hashes.insert(file, hash);
593 }
594 Ok(hashes)
595}
596
597fn source_files_from_frontmatter(content: &str) -> BTreeSet<String> {
598 let mut files = BTreeSet::new();
599 let mut in_frontmatter = false;
600 for line in content.lines() {
601 if line == "---" {
602 if in_frontmatter {
603 break;
604 }
605 in_frontmatter = true;
606 continue;
607 }
608 if !in_frontmatter {
609 continue;
610 }
611 if let Some(file) = line
612 .strip_prefix(" - file: ")
613 .and_then(unquote_yaml_string)
614 {
615 files.insert(file);
616 }
617 }
618 files
619}
620
621fn unquote_yaml_string(value: &str) -> Option<String> {
622 let value = value.trim();
623 let inner = value.strip_prefix('"')?.strip_suffix('"')?;
624 let mut out = String::new();
625 let mut chars = inner.chars();
626 while let Some(ch) = chars.next() {
627 if ch == '\\' {
628 out.push(chars.next()?);
629 } else {
630 out.push(ch);
631 }
632 }
633 Some(out)
634}
635
636fn fetch_codewiki_graph_edges(
637 ctx: &Context,
638 files: &[String],
639 symbols: &[Symbol],
640) -> anyhow::Result<CodewikiGraph> {
641 let symbol_components = symbols
642 .iter()
643 .filter(|symbol| is_core_file(&symbol.file_path))
644 .map(|symbol| (symbol.id.clone(), component_id(symbol)))
645 .collect::<HashMap<_, _>>();
646 if symbol_components.is_empty() {
647 return Ok(CodewikiGraph::available(Vec::new()));
648 }
649
650 let Some(config) = &ctx.falkordb else {
651 return Ok(CodewikiGraph::unavailable());
652 };
653
654 let mut client = match falkor::FalkorClient::from_config(config) {
655 Ok(client) => client,
656 Err(e) => {
657 if !ctx.quiet {
658 eprintln!("Warning: FalkorDB connection failed: {e}");
659 }
660 return Ok(CodewikiGraph::unavailable());
661 }
662 };
663
664 fn query_or_unavailable(
665 ctx: &Context,
666 client: &mut falkor::FalkorClient,
667 query: &str,
668 params: HashMap<String, String>,
669 ) -> Option<Vec<falkor::Row>> {
670 match client.query(query, Some(params)) {
671 Ok(rows) => Some(rows),
672 Err(e) => {
673 if !ctx.quiet {
674 eprintln!("Warning: FalkorDB query failed: {e}");
675 }
676 None
677 }
678 }
679 }
680
681 let symbol_ids = symbol_components.keys().cloned().collect::<Vec<_>>();
682 let core_files = files
683 .iter()
684 .filter(|file| is_core_file(file))
685 .cloned()
686 .collect::<Vec<_>>();
687
688 let mut edges = Vec::new();
689 let (query, params) = codewiki_call_edges_query(&ctx.project_id, &symbol_ids);
690 let Some(rows) = query_or_unavailable(ctx, &mut client, &query, params) else {
691 return Ok(CodewikiGraph::unavailable());
692 };
693 for row in rows {
694 let Some(source) = row.get("source").and_then(|value| value.as_str()) else {
695 continue;
696 };
697 let Some(target) = row.get("target").and_then(|value| value.as_str()) else {
698 continue;
699 };
700 let Some(source_component_id) = symbol_components.get(source).cloned() else {
701 continue;
702 };
703 let Some(target_component_id) = symbol_components.get(target).cloned() else {
704 continue;
705 };
706 edges.push(CodewikiGraphEdge::call(
707 source_component_id,
708 target_component_id,
709 ));
710 }
711
712 if !core_files.is_empty() {
713 let file_symbols = symbols_by_file_component(symbols);
714 let (query, params) = codewiki_import_edges_query(&ctx.project_id, &core_files);
715 let Some(rows) = query_or_unavailable(ctx, &mut client, &query, params) else {
716 return Ok(CodewikiGraph::unavailable());
717 };
718 for row in rows {
719 let Some(source_file) = row.get("source").and_then(|value| value.as_str()) else {
720 continue;
721 };
722 let Some(target_module) = row.get("target").and_then(|value| value.as_str()) else {
723 continue;
724 };
725 let Some(source_component_id) = first_component_for_file(&file_symbols, source_file)
726 else {
727 continue;
728 };
729 for target_file in files_for_import_target(&core_files, target_module) {
730 let Some(target_component_id) =
731 first_component_for_file(&file_symbols, target_file)
732 else {
733 continue;
734 };
735 edges.push(CodewikiGraphEdge::import(
736 source_component_id.clone(),
737 target_component_id,
738 ));
739 }
740 }
741 }
742
743 Ok(CodewikiGraph::available(edges))
744}
745
746fn codewiki_call_edges_query(
747 project_id: &str,
748 symbol_ids: &[String],
749) -> (String, HashMap<String, String>) {
750 (
751 format!(
752 "MATCH (source:CodeSymbol {{project: $project}})-[:CALLS]->(target:CodeSymbol {{project: $project}}) \
753 WHERE source.id IN [{}] AND target.id IN [{}] \
754 RETURN source.id AS source, target.id AS target \
755 LIMIT 5000",
756 falkor::id_list_literal(symbol_ids),
757 falkor::id_list_literal(symbol_ids)
758 ),
759 HashMap::from([(
760 "project".to_string(),
761 falkor::cypher_string_literal(project_id),
762 )]),
763 )
764}
765
766fn codewiki_import_edges_query(
767 project_id: &str,
768 files: &[String],
769) -> (String, HashMap<String, String>) {
770 (
771 format!(
772 "MATCH (source:CodeFile {{project: $project}})-[:IMPORTS]->(target:CodeModule {{project: $project}}) \
773 WHERE source.path IN [{}] \
774 RETURN source.path AS source, target.name AS target \
775 LIMIT 5000",
776 falkor::id_list_literal(files)
777 ),
778 HashMap::from([(
779 "project".to_string(),
780 falkor::cypher_string_literal(project_id),
781 )]),
782 )
783}
784
785fn cluster_file_modules(
786 files: &[String],
787 symbols_by_file: &BTreeMap<String, Vec<Symbol>>,
788 graph_edges: &[CodewikiGraphEdge],
789) -> HashMap<String, String> {
790 let mut components_to_file = HashMap::new();
791 for (file, symbols) in symbols_by_file {
792 for symbol in symbols {
793 components_to_file.insert(component_id(symbol), file.clone());
794 }
795 }
796
797 let mut parents = files
798 .iter()
799 .map(|file| (file.clone(), file.clone()))
800 .collect::<HashMap<_, _>>();
801 for edge in graph_edges
802 .iter()
803 .filter(|edge| edge.kind == CodewikiGraphEdgeKind::Call)
804 {
805 let Some(source_file) = components_to_file.get(&edge.source_component_id) else {
806 continue;
807 };
808 let Some(target_file) = components_to_file.get(&edge.target_component_id) else {
809 continue;
810 };
811 union_files(&mut parents, source_file, target_file);
812 }
813
814 let mut grouped: BTreeMap<String, Vec<String>> = BTreeMap::new();
815 for file in files {
816 let root = find_file_root(&mut parents, file);
817 grouped.entry(root).or_default().push(file.clone());
818 }
819
820 let mut modules = HashMap::new();
821 for grouped_files in grouped.values() {
822 let module = if grouped_files.len() > 1 {
823 common_module_for_files(grouped_files)
824 } else {
825 module_for_file(&grouped_files[0])
826 };
827 for file in grouped_files {
828 modules.insert(file.clone(), module.clone());
829 }
830 }
831 modules
832}
833
834fn union_files(parents: &mut HashMap<String, String>, left: &str, right: &str) {
835 let left_root = find_file_root(parents, left);
836 let right_root = find_file_root(parents, right);
837 if left_root != right_root {
838 let (parent, child) = if left_root <= right_root {
839 (left_root, right_root)
840 } else {
841 (right_root, left_root)
842 };
843 parents.insert(child, parent);
844 }
845}
846
847fn find_file_root(parents: &mut HashMap<String, String>, file: &str) -> String {
848 let mut current = file.to_string();
849 let mut path = Vec::new();
850 let mut seen = HashSet::new();
851
852 let root = loop {
853 if !seen.insert(current.clone()) {
854 let root = path
855 .iter()
856 .chain(std::iter::once(¤t))
857 .min()
858 .cloned()
859 .unwrap_or_else(|| current.clone());
860 parents.insert(current, root.clone());
861 break root;
862 }
863
864 let parent = parents
865 .get(¤t)
866 .cloned()
867 .unwrap_or_else(|| current.clone());
868 if parent == current {
869 break parent;
870 }
871
872 path.push(current);
873 current = parent;
874 };
875
876 for node in path {
877 parents.insert(node, root.clone());
878 }
879 root
880}
881
882fn common_module_for_files(files: &[String]) -> String {
883 let mut common = module_for_file(&files[0])
884 .split('/')
885 .filter(|part| !part.is_empty())
886 .map(str::to_string)
887 .collect::<Vec<_>>();
888 for file in &files[1..] {
889 let parts = module_for_file(file)
890 .split('/')
891 .filter(|part| !part.is_empty())
892 .map(str::to_string)
893 .collect::<Vec<_>>();
894 let keep = common
895 .iter()
896 .zip(parts.iter())
897 .take_while(|(left, right)| left == right)
898 .count();
899 common.truncate(keep);
900 }
901 common.join("/")
902}
903
904fn symbols_by_file_component(symbols: &[Symbol]) -> BTreeMap<String, Vec<String>> {
905 let mut out: BTreeMap<String, Vec<String>> = BTreeMap::new();
906 for symbol in symbols {
907 if is_core_file(&symbol.file_path) {
908 out.entry(symbol.file_path.clone())
909 .or_default()
910 .push(component_id(symbol));
911 }
912 }
913 out
914}
915
916fn first_component_for_file(
917 symbols_by_file: &BTreeMap<String, Vec<String>>,
918 file: &str,
919) -> Option<String> {
920 symbols_by_file
921 .get(file)
922 .and_then(|components| components.first())
923 .cloned()
924}
925
926fn files_for_import_target<'a>(files: &'a [String], target_module: &str) -> Vec<&'a str> {
927 let target = target_module.replace("::", "/").replace('.', "/");
928 files
929 .iter()
930 .map(String::as_str)
931 .filter(|file| {
932 file.starts_with(&format!("{target}/")) || file.contains(&format!("/{target}/"))
933 })
934 .collect()
935}
936
937fn build_file_doc(
938 file: &str,
939 module: String,
940 symbols: Vec<Symbol>,
941 generate: &mut Option<&mut TextGenerator<'_>>,
942) -> FileDoc {
943 let symbol_docs = symbols
944 .into_iter()
945 .map(|symbol| {
946 let fallback = structural_symbol_purpose(&symbol);
947 let generated = maybe_generate(
948 generate,
949 &prompts::symbol_prompt(&symbol),
950 prompts::SYMBOL_SYSTEM,
951 )
952 .unwrap_or(fallback);
953 let component_id = component_id(&symbol);
954 let component_label = component_label(&symbol);
955 let source_span = SourceSpan::from_symbol(&symbol);
956 let purpose = ground_text(
957 &generated,
958 std::slice::from_ref(&source_span),
959 &source_span.citation(),
960 );
961 SymbolDoc {
962 symbol,
963 purpose,
964 component_id,
965 component_label,
966 source_span,
967 }
968 })
969 .collect::<Vec<_>>();
970 let source_spans = symbol_docs
971 .iter()
972 .map(|symbol| symbol.source_span.clone())
973 .collect::<Vec<_>>();
974 let prompt_symbols = symbol_docs
975 .iter()
976 .map(|symbol| prompts::SymbolSummary {
977 name: symbol.symbol.qualified_name.clone(),
978 kind: symbol.symbol.kind.clone(),
979 component_id: symbol.component_id.clone(),
980 component_label: symbol.component_label.clone(),
981 line_start: symbol.symbol.line_start,
982 line_end: symbol.symbol.line_end,
983 purpose: symbol.purpose.clone(),
984 })
985 .collect::<Vec<_>>();
986 let component_ids = symbol_docs
987 .iter()
988 .map(|symbol| symbol.component_id.clone())
989 .collect::<Vec<_>>();
990 let fallback = structural_file_summary(file, &symbol_docs);
991 let generated = maybe_generate(
992 generate,
993 &prompts::file_prompt(file, &prompt_symbols),
994 prompts::FILE_SYSTEM,
995 )
996 .unwrap_or(fallback);
997 let summary = ground_text(&generated, &source_spans, &citation_list(&source_spans));
998
999 FileDoc {
1000 path: file.to_string(),
1001 module,
1002 summary,
1003 source_spans,
1004 symbols: symbol_docs,
1005 component_ids,
1006 }
1007}
1008
1009fn build_module_docs(
1010 files: &[FileDoc],
1011 graph_edges: &[CodewikiGraphEdge],
1012 graph_availability: CodewikiGraphAvailability,
1013 generate: &mut Option<&mut TextGenerator<'_>>,
1014) -> Vec<ModuleDoc> {
1015 let mut module_names = BTreeSet::new();
1016 for file in files {
1017 for module in module_ancestors(&file.module) {
1018 module_names.insert(module);
1019 }
1020 }
1021
1022 let mut module_summaries: BTreeMap<String, String> = BTreeMap::new();
1023 let mut module_sources: BTreeMap<String, Vec<SourceSpan>> = BTreeMap::new();
1024 let mut modules = module_names.into_iter().collect::<Vec<_>>();
1025 modules.sort_by_key(|module| std::cmp::Reverse(module_depth(module)));
1026
1027 let mut docs = Vec::new();
1028 for module in modules {
1029 let direct_files = files
1030 .iter()
1031 .filter(|file| file.module == module)
1032 .map(|file| FileLink {
1033 path: file.path.clone(),
1034 summary: file.summary.clone(),
1035 source_spans: file.source_spans.clone(),
1036 })
1037 .collect::<Vec<_>>();
1038 let child_modules = direct_child_modules(&module, module_summaries.keys())
1039 .into_iter()
1040 .map(|child| ModuleLink {
1041 summary: module_summaries.get(&child).cloned().unwrap_or_default(),
1042 source_spans: module_sources.get(&child).cloned().unwrap_or_default(),
1043 module: child,
1044 })
1045 .collect::<Vec<_>>();
1046 let file_summaries = direct_files
1047 .iter()
1048 .map(|file| prompts::ChildSummary {
1049 name: file.path.clone(),
1050 summary: file.summary.clone(),
1051 })
1052 .collect::<Vec<_>>();
1053 let child_summaries = child_modules
1054 .iter()
1055 .map(|module| prompts::ChildSummary {
1056 name: module.module.clone(),
1057 summary: module.summary.clone(),
1058 })
1059 .collect::<Vec<_>>();
1060 let component_ids = files
1061 .iter()
1062 .filter(|file| file.module == module || module_is_ancestor(&module, &file.module))
1063 .flat_map(|file| {
1064 file.symbols
1065 .iter()
1066 .map(|symbol| format!("{} ({})", symbol.component_label, symbol.component_id))
1067 })
1068 .collect::<Vec<_>>();
1069 let dependency_diagram = render_module_dependency_mermaid(&module, files, graph_edges);
1070 let call_diagram = render_module_call_mermaid(&module, files, graph_edges);
1071 let fallback = structural_module_summary(&module, &direct_files, &child_modules);
1072 let source_spans = collect_link_spans(&direct_files, &child_modules);
1073 let generated = maybe_generate(
1074 generate,
1075 &prompts::module_prompt(&module, &file_summaries, &child_summaries, &component_ids),
1076 prompts::MODULE_SYSTEM,
1077 )
1078 .unwrap_or(fallback);
1079 let summary = ground_text(&generated, &source_spans, &citation_list(&source_spans));
1080
1081 module_summaries.insert(module.clone(), summary.clone());
1082 module_sources.insert(module.clone(), source_spans.clone());
1083 docs.push(ModuleDoc {
1084 module,
1085 summary,
1086 source_spans,
1087 direct_files,
1088 child_modules,
1089 component_ids,
1090 dependency_diagram,
1091 call_diagram,
1092 graph_availability,
1093 });
1094 }
1095
1096 docs.sort_by(|a, b| a.module.cmp(&b.module));
1097 docs
1098}
1099
1100fn render_module_dependency_mermaid(
1101 module: &str,
1102 files: &[FileDoc],
1103 graph_edges: &[CodewikiGraphEdge],
1104) -> Option<String> {
1105 let mut component_to_module = HashMap::new();
1106 for file in files {
1107 for component_id in &file.component_ids {
1108 component_to_module.insert(component_id.as_str(), file.module.as_str());
1109 }
1110 }
1111
1112 let all_edges = graph_edges
1113 .iter()
1114 .filter(|edge| edge.kind == CodewikiGraphEdgeKind::Import)
1115 .filter_map(|edge| {
1116 let source = component_to_module.get(edge.source_component_id.as_str())?;
1117 let target = component_to_module.get(edge.target_component_id.as_str())?;
1118 if source == target {
1119 return None;
1120 }
1121 Some(((*source).to_string(), (*target).to_string()))
1122 })
1123 .collect::<BTreeSet<_>>();
1124 if all_edges.is_empty() {
1125 return None;
1126 }
1127
1128 let bounded_edges = bounded_module_dependency_edges(module, &all_edges, MAX_MERMAID_HOPS);
1129 if bounded_edges.is_empty() {
1130 return None;
1131 }
1132
1133 let mut diagram = "```mermaid\ngraph LR\n".to_string();
1134 for (source, target) in bounded_edges {
1135 let _ = writeln!(
1136 diagram,
1137 " {}[\"{}\"] --> {}[\"{}\"]",
1138 mermaid_node_id(&source),
1139 mermaid_label(&source),
1140 mermaid_node_id(&target),
1141 mermaid_label(&target)
1142 );
1143 }
1144 diagram.push_str("```\n");
1145 Some(diagram)
1146}
1147
1148fn render_module_call_mermaid(
1149 module: &str,
1150 files: &[FileDoc],
1151 graph_edges: &[CodewikiGraphEdge],
1152) -> Option<String> {
1153 let component_labels = files
1154 .iter()
1155 .flat_map(|file| {
1156 file.symbols.iter().map(|symbol| {
1157 (
1158 symbol.component_id.as_str(),
1159 symbol.component_label.as_str(),
1160 )
1161 })
1162 })
1163 .collect::<HashMap<_, _>>();
1164 let component_to_module = files
1165 .iter()
1166 .flat_map(|file| {
1167 file.component_ids
1168 .iter()
1169 .map(|component_id| (component_id.as_str(), file.module.as_str()))
1170 })
1171 .collect::<HashMap<_, _>>();
1172 let all_edges = graph_edges
1173 .iter()
1174 .filter(|edge| edge.kind == CodewikiGraphEdgeKind::Call)
1175 .filter_map(|edge| {
1176 let source_module = component_to_module.get(edge.source_component_id.as_str())?;
1177 let target_module = component_to_module.get(edge.target_component_id.as_str())?;
1178 if *source_module != module && *target_module != module {
1179 return None;
1180 }
1181 Some((
1182 edge.source_component_id.clone(),
1183 edge.target_component_id.clone(),
1184 ))
1185 })
1186 .collect::<BTreeSet<_>>();
1187 if all_edges.is_empty() {
1188 return None;
1189 }
1190
1191 let seed_components = files
1192 .iter()
1193 .filter(|file| file.module == module || module_is_ancestor(module, &file.module))
1194 .flat_map(|file| file.component_ids.iter().cloned())
1195 .collect::<BTreeSet<_>>();
1196 let bounded_edges = bounded_component_edges(
1197 &seed_components,
1198 &all_edges,
1199 MAX_MERMAID_HOPS,
1200 MAX_MERMAID_EDGES,
1201 );
1202 if bounded_edges.is_empty() {
1203 return None;
1204 }
1205
1206 let mut participants = BTreeSet::new();
1207 for (source, target) in &bounded_edges {
1208 participants.insert(source.clone());
1209 participants.insert(target.clone());
1210 }
1211
1212 let mut diagram = "```mermaid\nsequenceDiagram\n".to_string();
1213 for component in participants {
1214 let _ = writeln!(
1215 diagram,
1216 " participant {} as {}",
1217 mermaid_node_id(&component),
1218 mermaid_label(
1219 component_labels
1220 .get(component.as_str())
1221 .copied()
1222 .unwrap_or(&component)
1223 )
1224 );
1225 }
1226 for (source, target) in bounded_edges {
1227 let _ = writeln!(
1228 diagram,
1229 " {}->>{}: calls",
1230 mermaid_node_id(&source),
1231 mermaid_node_id(&target)
1232 );
1233 }
1234 diagram.push_str("```\n");
1235 Some(diagram)
1236}
1237
1238fn bounded_module_dependency_edges(
1239 module: &str,
1240 edges: &BTreeSet<(String, String)>,
1241 max_hops: usize,
1242) -> BTreeSet<(String, String)> {
1243 let mut distances = BTreeMap::from([(module.to_string(), 0usize)]);
1244 let mut queue = VecDeque::from([(module.to_string(), 0usize)]);
1245
1246 while let Some((current, distance)) = queue.pop_front() {
1247 if distance >= max_hops {
1248 continue;
1249 }
1250 for (source, target) in edges {
1251 for next in dependency_neighbors(¤t, source, target) {
1252 if distances.contains_key(next) {
1253 continue;
1254 }
1255 let next_distance = distance + 1;
1256 distances.insert(next.to_string(), next_distance);
1257 queue.push_back((next.to_string(), next_distance));
1258 }
1259 }
1260 }
1261
1262 edges
1263 .iter()
1264 .filter(|(source, target)| distances.contains_key(source) && distances.contains_key(target))
1265 .cloned()
1266 .collect()
1267}
1268
1269fn bounded_component_edges(
1270 seed_components: &BTreeSet<String>,
1271 edges: &BTreeSet<(String, String)>,
1272 max_hops: usize,
1273 max_edges: usize,
1274) -> BTreeSet<(String, String)> {
1275 let mut distances = seed_components
1276 .iter()
1277 .map(|component| (component.clone(), 0usize))
1278 .collect::<BTreeMap<_, _>>();
1279 let mut queue = seed_components
1280 .iter()
1281 .map(|component| (component.clone(), 0usize))
1282 .collect::<VecDeque<_>>();
1283
1284 while let Some((current, distance)) = queue.pop_front() {
1285 if distance >= max_hops {
1286 continue;
1287 }
1288 for (source, target) in edges {
1289 for next in dependency_neighbors(¤t, source, target) {
1290 if distances.contains_key(next) {
1291 continue;
1292 }
1293 let next_distance = distance + 1;
1294 distances.insert(next.to_string(), next_distance);
1295 queue.push_back((next.to_string(), next_distance));
1296 }
1297 }
1298 }
1299
1300 edges
1301 .iter()
1302 .filter(|(source, target)| distances.contains_key(source) && distances.contains_key(target))
1303 .take(max_edges)
1304 .cloned()
1305 .collect()
1306}
1307
1308fn dependency_neighbors<'a>(module: &str, source: &'a str, target: &'a str) -> Vec<&'a str> {
1309 let mut neighbors = Vec::with_capacity(2);
1310 if source == module {
1311 neighbors.push(target);
1312 }
1313 if target == module {
1314 neighbors.push(source);
1315 }
1316 neighbors
1317}
1318
1319fn mermaid_node_id(module: &str) -> String {
1320 let mut out = String::from("m_");
1321 for ch in module.chars() {
1322 if ch.is_ascii_alphanumeric() {
1323 out.push(ch);
1324 } else {
1325 out.push('_');
1326 }
1327 }
1328 out
1329}
1330
1331fn mermaid_label(module: &str) -> String {
1332 if module.is_empty() {
1333 "repo".to_string()
1334 } else {
1335 module.replace('\\', "\\\\").replace('"', "\\\"")
1336 }
1337}
1338
1339fn build_repo_doc(
1340 files: &[FileDoc],
1341 modules: &[ModuleDoc],
1342 generate: &mut Option<&mut TextGenerator<'_>>,
1343) -> String {
1344 let top_modules = modules
1345 .iter()
1346 .filter(|module| parent_module(&module.module).is_none())
1347 .map(|module| ModuleLink {
1348 module: module.module.clone(),
1349 summary: module.summary.clone(),
1350 source_spans: module.source_spans.clone(),
1351 })
1352 .collect::<Vec<_>>();
1353 let root_files = files
1354 .iter()
1355 .filter(|file| file.module.is_empty())
1356 .map(|file| FileLink {
1357 path: file.path.clone(),
1358 summary: file.summary.clone(),
1359 source_spans: file.source_spans.clone(),
1360 })
1361 .collect::<Vec<_>>();
1362 let module_summaries = top_modules
1363 .iter()
1364 .map(|module| prompts::ChildSummary {
1365 name: module.module.clone(),
1366 summary: module.summary.clone(),
1367 })
1368 .collect::<Vec<_>>();
1369 let file_summaries = root_files
1370 .iter()
1371 .map(|file| prompts::ChildSummary {
1372 name: file.path.clone(),
1373 summary: file.summary.clone(),
1374 })
1375 .collect::<Vec<_>>();
1376 let fallback = structural_repo_summary(files.len(), modules.len());
1377 let source_spans = collect_link_spans(&root_files, &top_modules);
1378 let generated = maybe_generate(
1379 generate,
1380 &prompts::repo_prompt(&module_summaries, &file_summaries),
1381 prompts::REPO_SYSTEM,
1382 )
1383 .unwrap_or(fallback);
1384 let summary = ground_text(&generated, &source_spans, &citation_list(&source_spans));
1385
1386 render_repo_doc(&summary, &top_modules, &root_files, &source_spans)
1387}
1388
1389fn render_repo_doc(
1390 summary: &str,
1391 modules: &[ModuleLink],
1392 files: &[FileLink],
1393 source_spans: &[SourceSpan],
1394) -> String {
1395 let mut doc = frontmatter("Repository Overview", "code_repo", source_spans);
1396 doc.push_str("# Repository Overview\n\n");
1397 write_section(&mut doc, "Overview", summary);
1398 if !modules.is_empty() {
1399 doc.push_str("## Modules\n\n");
1400 for module in modules {
1401 let _ = writeln!(
1402 doc,
1403 "- {} - {}",
1404 module_wikilink(&module.module),
1405 module.summary
1406 );
1407 }
1408 doc.push('\n');
1409 }
1410 if !files.is_empty() {
1411 doc.push_str("## Files\n\n");
1412 for file in files {
1413 let _ = writeln!(doc, "- {} - {}", file_wikilink(&file.path), file.summary);
1414 }
1415 doc.push('\n');
1416 }
1417 doc
1418}
1419
1420fn render_module_doc(module: &ModuleDoc) -> String {
1421 let mut doc = frontmatter(&module.module, "code_module", &module.source_spans);
1422 let _ = writeln!(doc, "# {}\n", module.module);
1423 match parent_module(&module.module) {
1424 Some(parent) => {
1425 let _ = writeln!(doc, "Parent: {}\n", module_wikilink(parent));
1426 }
1427 None => doc.push_str("Parent: [[repo|Repository Overview]]\n\n"),
1428 }
1429 write_section(&mut doc, "Overview", &module.summary);
1430 match module.graph_availability {
1431 CodewikiGraphAvailability::Unavailable => {
1432 doc.push_str("## Dependency Diagram\n\n`degraded: graph-unavailable`\n\n");
1433 }
1434 CodewikiGraphAvailability::Available => {
1435 if let Some(diagram) = &module.dependency_diagram {
1436 doc.push_str("## Dependency Diagram\n\n");
1437 doc.push_str(diagram);
1438 doc.push('\n');
1439 }
1440 if let Some(diagram) = &module.call_diagram {
1441 doc.push_str("## Call Diagram\n\n");
1442 doc.push_str(diagram);
1443 doc.push('\n');
1444 }
1445 }
1446 }
1447 if !module.child_modules.is_empty() {
1448 doc.push_str("## Child Modules\n\n");
1449 for child in &module.child_modules {
1450 let _ = writeln!(
1451 doc,
1452 "- {} - {}",
1453 module_wikilink(&child.module),
1454 child.summary
1455 );
1456 }
1457 doc.push('\n');
1458 }
1459 if !module.direct_files.is_empty() {
1460 doc.push_str("## Files\n\n");
1461 for file in &module.direct_files {
1462 let _ = writeln!(doc, "- {} - {}", file_wikilink(&file.path), file.summary);
1463 }
1464 doc.push('\n');
1465 }
1466 if !module.component_ids.is_empty() {
1467 doc.push_str("## Components\n\n");
1468 for component_id in &module.component_ids {
1469 let _ = writeln!(doc, "- {}", inline_code(component_id));
1470 }
1471 doc.push('\n');
1472 }
1473 doc
1474}
1475
1476fn render_file_doc(file: &FileDoc) -> String {
1477 let mut doc = frontmatter(&file.path, "code_file", &file.source_spans);
1478 let _ = writeln!(doc, "# {}\n", file.path);
1479 if file.module.is_empty() {
1480 doc.push_str("Module: [[repo|Repository Overview]]\n\n");
1481 } else {
1482 let _ = writeln!(doc, "Module: {}\n", module_wikilink(&file.module));
1483 }
1484 write_section(&mut doc, "Purpose", &file.summary);
1485 doc.push_str("## API Symbols\n\n");
1486 if file.symbols.is_empty() {
1487 doc.push_str("No indexed symbols.\n");
1488 return doc;
1489 }
1490 for symbol in &file.symbols {
1491 let _ = writeln!(
1492 doc,
1493 "- {} ({}) component {} ({}) lines {}-{} {}",
1494 inline_code(&symbol.symbol.qualified_name),
1495 symbol.symbol.kind,
1496 inline_code(&symbol.component_label),
1497 inline_code(&symbol.component_id),
1498 symbol.symbol.line_start,
1499 symbol.symbol.line_end,
1500 symbol.source_span.citation()
1501 );
1502 if let Some(signature) = symbol
1503 .symbol
1504 .signature
1505 .as_deref()
1506 .filter(|value| !value.is_empty())
1507 {
1508 let _ = writeln!(doc, " - Signature: {}", inline_code(signature));
1509 }
1510 let _ = writeln!(doc, " - Purpose: {}", symbol.purpose);
1511 }
1512 doc.push('\n');
1513 doc
1514}
1515
1516fn resolve_text_generator(
1517 ctx: &Context,
1518 ai: Option<AiRouting>,
1519) -> Option<Box<TextGenerator<'static>>> {
1520 let ai_context = resolve_ai_context(ctx, ai).ok()?;
1521 let route = effective_route(&ai_context, AiCapability::TextGenerate);
1522 if matches!(route, AiRouting::Off | AiRouting::Auto) {
1523 return None;
1524 }
1525
1526 let mut warned = false;
1527 let quiet = ctx.quiet;
1528 Some(Box::new(move |prompt, system| {
1529 let result = match route {
1530 AiRouting::Daemon => generate_via_daemon(&ai_context, prompt, Some(system)),
1531 AiRouting::Direct => generate_text(&ai_context, prompt, Some(system)),
1532 AiRouting::Off | AiRouting::Auto => return None,
1533 };
1534 match result {
1535 Ok(result) => clean_generated(result.text),
1536 Err(error) => {
1537 if !quiet && !warned {
1538 eprintln!("text generation unavailable; using AST-only codewiki docs: {error}");
1539 warned = true;
1540 }
1541 None
1542 }
1543 }
1544 }))
1545}
1546
1547fn resolve_ai_context(ctx: &Context, ai: Option<AiRouting>) -> anyhow::Result<AiContext> {
1548 let mut conn = db::connect_readonly(&ctx.database_url)?;
1549 let standalone = config::read_standalone_config_optional();
1550 let primary = PostgresAiConfigSource::new(&mut conn, secrets::resolve_config_value);
1551 let mut source = AiConfigSource::with_primary(primary, standalone);
1552 Ok(AiContext::resolve_with_options(
1553 Some(ctx.project_id.clone()),
1554 &mut source,
1555 AiContextOptions {
1556 no_ai: false,
1557 forced_routing: ai,
1558 },
1559 ))
1560}
1561
1562fn maybe_generate(
1563 generate: &mut Option<&mut TextGenerator<'_>>,
1564 prompt: &str,
1565 system: &str,
1566) -> Option<String> {
1567 generate
1568 .as_deref_mut()
1569 .and_then(|generate| generate(prompt, system))
1570 .and_then(clean_generated)
1571}
1572
1573fn clean_generated(text: String) -> Option<String> {
1574 let text = text.trim();
1575 (!text.is_empty()).then(|| text.to_string())
1576}
1577
1578fn structural_symbol_purpose(symbol: &Symbol) -> String {
1579 if let Some(summary) = symbol.summary.as_deref().filter(|value| !value.is_empty()) {
1580 return summary.to_string();
1581 }
1582 if let Some(docstring) = symbol
1583 .docstring
1584 .as_deref()
1585 .filter(|value| !value.is_empty())
1586 {
1587 return docstring.to_string();
1588 }
1589 format!(
1590 "Indexed {} `{}` in `{}`.",
1591 symbol.kind, symbol.qualified_name, symbol.file_path
1592 )
1593}
1594
1595fn structural_file_summary(file: &str, symbols: &[SymbolDoc]) -> String {
1596 if symbols.is_empty() {
1597 return format!("`{file}` has no indexed API symbols.");
1598 }
1599 format!(
1600 "`{file}` exposes {} indexed API symbol{}.",
1601 symbols.len(),
1602 plural(symbols.len())
1603 )
1604}
1605
1606fn structural_module_summary(
1607 module: &str,
1608 files: &[FileLink],
1609 child_modules: &[ModuleLink],
1610) -> String {
1611 let file_count = files.len();
1612 let child_count = child_modules.len();
1613 format!(
1614 "`{module}` contains {file_count} direct file{} and {child_count} child module{}.",
1615 plural(file_count),
1616 plural(child_count)
1617 )
1618}
1619
1620fn structural_repo_summary(file_count: usize, module_count: usize) -> String {
1621 format!(
1622 "Repository code documentation covers {file_count} file{} across {module_count} module{}.",
1623 plural(file_count),
1624 plural(module_count)
1625 )
1626}
1627
1628fn write_section(doc: &mut String, heading: &str, body: &str) {
1629 let _ = writeln!(doc, "## {heading}\n\n{}\n", body.trim());
1630}
1631
1632impl SourceSpan {
1633 fn from_symbol(symbol: &Symbol) -> Self {
1634 Self {
1635 file: symbol.file_path.clone(),
1636 line_start: symbol.line_start,
1637 line_end: symbol.line_end,
1638 }
1639 }
1640
1641 fn citation(&self) -> String {
1642 if self.line_start == self.line_end {
1643 format!("[{}:{}]", self.file, self.line_start)
1644 } else {
1645 format!("[{}:{}-{}]", self.file, self.line_start, self.line_end)
1646 }
1647 }
1648
1649 fn contains(&self, file: &str, line_start: usize, line_end: usize) -> bool {
1650 self.file == file && self.line_start <= line_start && line_end <= self.line_end
1651 }
1652}
1653
1654fn collect_link_spans(files: &[FileLink], modules: &[ModuleLink]) -> Vec<SourceSpan> {
1655 let mut spans = BTreeSet::new();
1656 for file in files {
1657 spans.extend(file.source_spans.iter().cloned());
1658 }
1659 for module in modules {
1660 spans.extend(module.source_spans.iter().cloned());
1661 }
1662 spans.into_iter().collect()
1663}
1664
1665fn citation_list(spans: &[SourceSpan]) -> String {
1666 spans
1667 .iter()
1668 .cloned()
1669 .collect::<BTreeSet<_>>()
1670 .into_iter()
1671 .map(|span| span.citation())
1672 .collect::<Vec<_>>()
1673 .join(" ")
1674}
1675
1676fn ground_text(text: &str, valid_spans: &[SourceSpan], fallback_citation: &str) -> String {
1677 let cleaned = strip_invalid_citations(text, valid_spans);
1678 if fallback_citation.is_empty() || contains_valid_citation(&cleaned, valid_spans) {
1679 cleaned
1680 } else {
1681 format!("{cleaned} {fallback_citation}")
1682 }
1683}
1684
1685fn strip_invalid_citations(text: &str, valid_spans: &[SourceSpan]) -> String {
1686 let mut out = String::new();
1687 let mut rest = text;
1688 while let Some(open) = rest.find('[') {
1689 let (before, after_open) = rest.split_at(open);
1690 out.push_str(before);
1691 let after_open = &after_open[1..];
1692 let Some(close) = after_open.find(']') else {
1693 out.push('[');
1694 out.push_str(after_open);
1695 return out;
1696 };
1697 let candidate = &after_open[..close];
1698 if citation_parts(candidate).is_none_or(|(file, start, end)| {
1699 valid_spans
1700 .iter()
1701 .any(|span| span.contains(file, start, end))
1702 }) {
1703 out.push('[');
1704 out.push_str(candidate);
1705 out.push(']');
1706 }
1707 rest = &after_open[close + 1..];
1708 }
1709 out.push_str(rest);
1710 out
1711}
1712
1713fn contains_valid_citation(text: &str, valid_spans: &[SourceSpan]) -> bool {
1714 let mut rest = text;
1715 while let Some(open) = rest.find('[') {
1716 let after_open = &rest[open + 1..];
1717 let Some(close) = after_open.find(']') else {
1718 return false;
1719 };
1720 if let Some((file, start, end)) = citation_parts(&after_open[..close])
1721 && valid_spans
1722 .iter()
1723 .any(|span| span.contains(file, start, end))
1724 {
1725 return true;
1726 }
1727 rest = &after_open[close + 1..];
1728 }
1729 false
1730}
1731
1732fn citation_parts(value: &str) -> Option<(&str, usize, usize)> {
1733 let (file, range) = value.rsplit_once(':')?;
1734 if file.is_empty() || file.chars().any(char::is_whitespace) {
1735 return None;
1736 }
1737 let (line_start, line_end) = match range.split_once('-') {
1738 Some((start, end)) => (start.parse().ok()?, end.parse().ok()?),
1739 None => {
1740 let line = range.parse().ok()?;
1741 (line, line)
1742 }
1743 };
1744 (line_start > 0 && line_start <= line_end).then_some((file, line_start, line_end))
1745}
1746
1747fn frontmatter(title: &str, kind: &str, source_spans: &[SourceSpan]) -> String {
1748 let mut out = format!("---\ntitle: \"{}\"\ntype: {kind}\n", yaml_quote(title));
1749 let mut files: BTreeMap<&str, BTreeSet<(usize, usize)>> = BTreeMap::new();
1750 for span in source_spans {
1751 files
1752 .entry(&span.file)
1753 .or_default()
1754 .insert((span.line_start, span.line_end));
1755 }
1756 if files.is_empty() {
1757 out.push_str("source_files: []\n");
1758 out.push_str("---\n\n");
1759 return out;
1760 }
1761 out.push_str("source_files:\n");
1762 for (file, ranges) in files {
1763 let _ = writeln!(out, " - file: \"{}\"", yaml_quote(file));
1764 out.push_str(" ranges:\n");
1765 for (line_start, line_end) in ranges {
1766 if line_start == line_end {
1767 let _ = writeln!(out, " - \"{line_start}\"");
1768 } else {
1769 let _ = writeln!(out, " - \"{line_start}-{line_end}\"");
1770 }
1771 }
1772 }
1773 out.push_str("---\n\n");
1774 out
1775}
1776
1777fn yaml_quote(value: &str) -> String {
1778 value.replace('\\', "\\\\").replace('"', "\\\"")
1779}
1780
1781fn inline_code(value: &str) -> String {
1782 let value = value.replace('\n', " ");
1783 let delimiter = "`".repeat(max_backtick_run(&value).saturating_add(1));
1784 if value.starts_with('`') || value.ends_with('`') {
1785 format!("{delimiter} {value} {delimiter}")
1786 } else {
1787 format!("{delimiter}{value}{delimiter}")
1788 }
1789}
1790
1791fn max_backtick_run(value: &str) -> usize {
1792 let mut max_run = 0usize;
1793 let mut current_run = 0usize;
1794 for ch in value.chars() {
1795 if ch == '`' {
1796 current_run += 1;
1797 max_run = max_run.max(current_run);
1798 } else {
1799 current_run = 0;
1800 }
1801 }
1802 max_run
1803}
1804
1805fn plural(count: usize) -> &'static str {
1806 if count == 1 { "" } else { "s" }
1807}
1808
1809fn component_id(symbol: &Symbol) -> String {
1810 symbol.id.clone()
1811}
1812
1813fn component_label(symbol: &Symbol) -> String {
1814 let name = if symbol.qualified_name.is_empty() {
1815 &symbol.name
1816 } else {
1817 &symbol.qualified_name
1818 };
1819 format!("{name} [{}]", symbol.kind)
1820}
1821
1822fn is_core_file(file: &str) -> bool {
1823 let lower = file.to_ascii_lowercase();
1824 if lower.contains(".generated.")
1825 || lower.ends_with(".generated.rs")
1826 || lower.ends_with(".gen.rs")
1827 || lower.contains(".test.")
1828 || lower.contains(".spec.")
1829 || lower.ends_with("_test.rs")
1830 || lower.ends_with("_tests.rs")
1831 {
1832 return false;
1833 }
1834 !Path::new(file).components().any(|component| {
1835 let part = component.as_os_str().to_string_lossy().to_ascii_lowercase();
1836 matches!(
1837 part.as_str(),
1838 "test"
1839 | "tests"
1840 | "__tests__"
1841 | "spec"
1842 | "specs"
1843 | "fixture"
1844 | "fixtures"
1845 | "vendor"
1846 | "vendored"
1847 | "third_party"
1848 | "generated"
1849 | "gen"
1850 | "dist"
1851 | "build"
1852 | "target"
1853 | "node_modules"
1854 )
1855 })
1856}
1857
1858fn in_scope(file: &str, scopes: &[String]) -> bool {
1859 scopes.is_empty()
1860 || scopes.iter().any(|scope| scope.is_empty())
1861 || scopes.iter().any(|scope| {
1862 file == scope || file.starts_with(&format!("{}/", scope.trim_end_matches('/')))
1863 })
1864}
1865
1866fn module_for_file(file: &str) -> String {
1867 Path::new(file)
1868 .parent()
1869 .map(|path| path.to_string_lossy().replace('\\', "/"))
1870 .filter(|path| path != ".")
1871 .unwrap_or_default()
1872}
1873
1874fn module_ancestors(module: &str) -> Vec<String> {
1875 let mut out = Vec::new();
1876 let mut current = module;
1877 while !current.is_empty() {
1878 out.push(current.to_string());
1879 current = parent_module(current).unwrap_or("");
1880 }
1881 out
1882}
1883
1884fn parent_module(module: &str) -> Option<&str> {
1885 module.rsplit_once('/').map(|(parent, _)| parent)
1886}
1887
1888fn module_is_ancestor(module: &str, child: &str) -> bool {
1889 !module.is_empty() && child.starts_with(&format!("{module}/"))
1890}
1891
1892fn direct_child_modules<'a>(
1893 module: &str,
1894 candidates: impl Iterator<Item = &'a String>,
1895) -> Vec<String> {
1896 candidates
1897 .filter(|candidate| parent_module(candidate).is_some_and(|parent| parent == module))
1898 .cloned()
1899 .collect()
1900}
1901
1902fn module_depth(module: &str) -> usize {
1903 module.split('/').count()
1904}
1905
1906fn file_doc_path(file: &str) -> String {
1907 format!("files/{file}.md")
1908}
1909
1910fn module_doc_path(module: &str) -> String {
1911 format!("modules/{module}.md")
1912}
1913
1914fn file_wikilink(file: &str) -> String {
1915 format!("[[files/{file}|{file}]]")
1916}
1917
1918fn module_wikilink(module: &str) -> String {
1919 format!("[[modules/{module}|{module}]]")
1920}
1921
1922fn safe_doc_path(out_dir: &Path, relative_path: &str) -> anyhow::Result<PathBuf> {
1923 let path = Path::new(relative_path);
1924 if path.is_absolute()
1925 || path
1926 .components()
1927 .any(|component| matches!(component, std::path::Component::ParentDir))
1928 {
1929 anyhow::bail!("refusing to write unsafe codewiki path: {relative_path}");
1930 }
1931 Ok(out_dir.join(path))
1932}
1933
1934#[cfg(test)]
1935mod tests {
1936 use super::*;
1937
1938 #[test]
1939 fn generates_hierarchical_docs() {
1940 let out_dir = tempfile::tempdir().expect("tempdir");
1941 let input = CodewikiInput {
1942 files: vec!["src/lib.rs".to_string(), "src/nested/api.rs".to_string()],
1943 graph_edges: Vec::new(),
1944 graph_availability: CodewikiGraphAvailability::Available,
1945 symbols: vec![
1946 test_symbol("src/lib.rs", "Client", "class", 1, "pub struct Client {"),
1947 test_symbol("src/lib.rs", "connect", "function", 5, "pub fn connect()"),
1948 test_symbol(
1949 "src/nested/api.rs",
1950 "serve",
1951 "function",
1952 3,
1953 "pub fn serve()",
1954 ),
1955 ],
1956 };
1957
1958 let docs = generate_hierarchical_docs(&input, None);
1959 write_doc_set(out_dir.path(), &docs).expect("writes docs");
1960
1961 let repo = std::fs::read_to_string(out_dir.path().join("repo.md")).expect("repo doc");
1962 let module =
1963 std::fs::read_to_string(out_dir.path().join("modules/src.md")).expect("src module doc");
1964 let file =
1965 std::fs::read_to_string(out_dir.path().join("files/src/lib.rs.md")).expect("file doc");
1966
1967 assert!(repo.contains("[[modules/src|src]]"));
1968 assert!(repo.contains("Repository Overview"));
1969 assert!(module.contains("[[files/src/lib.rs|src/lib.rs]]"));
1970 assert!(file.contains("API Symbols"));
1971 assert!(file.contains("pub struct Client {"));
1972 assert!(file.contains("[[modules/src|src]]"));
1973 }
1974
1975 #[test]
1976 fn inline_code_uses_commonmark_backtick_delimiters() {
1977 assert_eq!(inline_code("plain"), "`plain`");
1978 assert_eq!(inline_code("a`b"), "``a`b``");
1979 assert_eq!(inline_code("a``b"), "```a``b```");
1980 assert_eq!(inline_code("`edge`"), "`` `edge` ``");
1981 assert_eq!(inline_code("two\nlines"), "`two lines`");
1982 }
1983
1984 #[test]
1985 fn clusters_modules_from_graph() {
1986 let input = CodewikiInput {
1987 files: vec![
1988 "src/api/handler.rs".to_string(),
1989 "src/domain/service.rs".to_string(),
1990 "tests/domain/service_test.rs".to_string(),
1991 "vendor/generated/client.rs".to_string(),
1992 ],
1993 graph_edges: vec![CodewikiGraphEdge::call(
1994 test_component_id("src/api/handler.rs", "handle", "function"),
1995 test_component_id("src/domain/service.rs", "Service", "class"),
1996 )],
1997 graph_availability: CodewikiGraphAvailability::Available,
1998 symbols: vec![
1999 test_symbol(
2000 "src/api/handler.rs",
2001 "handle",
2002 "function",
2003 1,
2004 "pub fn handle()",
2005 ),
2006 test_symbol(
2007 "src/domain/service.rs",
2008 "Service",
2009 "class",
2010 1,
2011 "pub struct Service;",
2012 ),
2013 test_symbol_with_qualified(
2014 "src/domain/service.rs",
2015 "new",
2016 "Service::new",
2017 "function",
2018 3,
2019 "pub fn new() -> Self",
2020 ),
2021 test_symbol(
2022 "tests/domain/service_test.rs",
2023 "service_test",
2024 "function",
2025 1,
2026 "fn service_test()",
2027 ),
2028 test_symbol(
2029 "vendor/generated/client.rs",
2030 "GeneratedClient",
2031 "class",
2032 1,
2033 "pub struct GeneratedClient;",
2034 ),
2035 ],
2036 };
2037
2038 let docs = generate_hierarchical_docs(&input, None);
2039 let docs_by_path = docs.into_iter().collect::<BTreeMap<_, _>>();
2040
2041 let module = docs_by_path
2042 .get("modules/src.md")
2043 .expect("graph-connected files cluster under common module");
2044 assert!(module.contains("[[files/src/api/handler.rs|src/api/handler.rs]]"));
2045 assert!(module.contains("[[files/src/domain/service.rs|src/domain/service.rs]]"));
2046 assert!(module.contains(&test_component_id(
2047 "src/api/handler.rs",
2048 "handle",
2049 "function"
2050 )));
2051 assert!(module.contains(&test_component_id(
2052 "src/domain/service.rs",
2053 "Service",
2054 "class"
2055 )));
2056 assert!(!docs_by_path.contains_key("files/tests/domain/service_test.rs.md"));
2057 assert!(!docs_by_path.contains_key("files/vendor/generated/client.rs.md"));
2058 }
2059
2060 #[test]
2061 fn file_root_detection_breaks_parent_cycles() {
2062 let mut parents = HashMap::from([
2063 ("b.rs".to_string(), "a.rs".to_string()),
2064 ("a.rs".to_string(), "b.rs".to_string()),
2065 ]);
2066
2067 let root = find_file_root(&mut parents, "a.rs");
2068
2069 assert_eq!(root, "a.rs");
2070 assert_eq!(parents.get("a.rs").map(String::as_str), Some("a.rs"));
2071 assert_eq!(parents.get("b.rs").map(String::as_str), Some("a.rs"));
2072 }
2073
2074 #[test]
2075 fn clusters_without_falkordb() {
2076 let input = CodewikiInput {
2077 files: vec![
2078 "src/api/handler.rs".to_string(),
2079 "src/domain/service.rs".to_string(),
2080 "tests/domain/service_test.rs".to_string(),
2081 ],
2082 graph_edges: Vec::new(),
2083 graph_availability: CodewikiGraphAvailability::Unavailable,
2084 symbols: vec![
2085 test_symbol(
2086 "src/api/handler.rs",
2087 "handle",
2088 "function",
2089 1,
2090 "pub fn handle()",
2091 ),
2092 test_symbol(
2093 "src/domain/service.rs",
2094 "Service",
2095 "class",
2096 1,
2097 "pub struct Service;",
2098 ),
2099 test_symbol_with_qualified(
2100 "src/domain/service.rs",
2101 "new",
2102 "Service::new",
2103 "function",
2104 3,
2105 "pub fn new() -> Self",
2106 ),
2107 test_symbol(
2108 "tests/domain/service_test.rs",
2109 "service_test",
2110 "function",
2111 1,
2112 "fn service_test()",
2113 ),
2114 ],
2115 };
2116
2117 let docs = generate_hierarchical_docs(&input, None);
2118 let docs_by_path = docs.into_iter().collect::<BTreeMap<_, _>>();
2119
2120 assert!(docs_by_path.contains_key("modules/src/api.md"));
2121 assert!(docs_by_path.contains_key("modules/src/domain.md"));
2122 assert!(!docs_by_path.contains_key("files/tests/domain/service_test.rs.md"));
2123 assert!(
2124 docs_by_path
2125 .get("files/src/api/handler.rs.md")
2126 .expect("handler file doc")
2127 .contains(&test_component_id(
2128 "src/api/handler.rs",
2129 "handle",
2130 "function"
2131 ))
2132 );
2133 assert!(
2134 docs_by_path
2135 .get("files/src/domain/service.rs.md")
2136 .expect("service file doc")
2137 .contains(&test_component_id(
2138 "src/domain/service.rs",
2139 "Service",
2140 "class"
2141 ))
2142 );
2143 assert!(
2144 docs_by_path
2145 .get("files/src/domain/service.rs.md")
2146 .expect("service file doc")
2147 .contains(&test_component_id(
2148 "src/domain/service.rs",
2149 "new",
2150 "function"
2151 ))
2152 );
2153 assert!(
2154 !docs_by_path
2155 .get("files/src/domain/service.rs.md")
2156 .expect("service file doc")
2157 .contains("src/domain/service.rs::Service::new")
2158 );
2159 }
2160
2161 #[test]
2162 fn emits_bounded_mermaid() {
2163 let input = CodewikiInput {
2164 files: vec![
2165 "src/api/handler.rs".to_string(),
2166 "src/domain/service.rs".to_string(),
2167 "src/storage/repo.rs".to_string(),
2168 "src/unrelated/tool.rs".to_string(),
2169 ],
2170 graph_edges: vec![
2171 CodewikiGraphEdge::import(
2172 test_component_id("src/api/handler.rs", "handle", "function"),
2173 test_component_id("src/domain/service.rs", "Service", "class"),
2174 ),
2175 CodewikiGraphEdge::import(
2176 test_component_id("src/domain/service.rs", "Service", "class"),
2177 test_component_id("src/storage/repo.rs", "Repo", "class"),
2178 ),
2179 CodewikiGraphEdge::import(
2180 test_component_id("src/unrelated/tool.rs", "Tool", "class"),
2181 test_component_id("src/storage/repo.rs", "Repo", "class"),
2182 ),
2183 ],
2184 graph_availability: CodewikiGraphAvailability::Available,
2185 symbols: vec![
2186 test_symbol(
2187 "src/api/handler.rs",
2188 "handle",
2189 "function",
2190 1,
2191 "pub fn handle()",
2192 ),
2193 test_symbol(
2194 "src/domain/service.rs",
2195 "Service",
2196 "class",
2197 1,
2198 "pub struct Service;",
2199 ),
2200 test_symbol(
2201 "src/storage/repo.rs",
2202 "Repo",
2203 "class",
2204 1,
2205 "pub struct Repo;",
2206 ),
2207 test_symbol(
2208 "src/unrelated/tool.rs",
2209 "Tool",
2210 "class",
2211 1,
2212 "pub struct Tool;",
2213 ),
2214 ],
2215 };
2216
2217 let docs = generate_hierarchical_docs(&input, None);
2218 let docs_by_path = docs.into_iter().collect::<BTreeMap<_, _>>();
2219 let rendered = docs_by_path
2220 .get("modules/src/api.md")
2221 .expect("api module doc");
2222
2223 assert!(rendered.contains("```mermaid"));
2224 assert!(rendered.contains("graph LR"));
2225 assert!(rendered.contains("m_src_api[\"src/api\"] --> m_src_domain[\"src/domain\"]"));
2226 assert!(
2227 rendered.contains("m_src_domain[\"src/domain\"] --> m_src_storage[\"src/storage\"]")
2228 );
2229 assert!(
2230 !rendered
2231 .contains("m_src_unrelated[\"src/unrelated\"] --> m_src_storage[\"src/storage\"]")
2232 );
2233 }
2234
2235 #[test]
2236 fn mermaid_degrades_without_falkordb() {
2237 let input = CodewikiInput {
2238 files: vec!["src/api/handler.rs".to_string()],
2239 graph_edges: Vec::new(),
2240 graph_availability: CodewikiGraphAvailability::Unavailable,
2241 symbols: vec![test_symbol(
2242 "src/api/handler.rs",
2243 "handle",
2244 "function",
2245 1,
2246 "pub fn handle()",
2247 )],
2248 };
2249
2250 let docs = generate_hierarchical_docs(&input, None);
2251 let docs_by_path = docs.into_iter().collect::<BTreeMap<_, _>>();
2252 let module = docs_by_path
2253 .get("modules/src/api.md")
2254 .expect("module doc still renders");
2255 let file = docs_by_path
2256 .get("files/src/api/handler.rs.md")
2257 .expect("file doc still renders");
2258
2259 assert!(module.contains("degraded: graph-unavailable"));
2260 assert!(file.contains("API Symbols"));
2261 assert!(file.contains(&test_component_id(
2262 "src/api/handler.rs",
2263 "handle",
2264 "function"
2265 )));
2266 }
2267
2268 #[test]
2269 fn empty_available_graph_does_not_emit_degradation_marker() {
2270 let input = CodewikiInput {
2271 files: vec!["src/api/handler.rs".to_string()],
2272 graph_edges: Vec::new(),
2273 graph_availability: CodewikiGraphAvailability::Available,
2274 symbols: vec![test_symbol(
2275 "src/api/handler.rs",
2276 "handle",
2277 "function",
2278 1,
2279 "pub fn handle()",
2280 )],
2281 };
2282
2283 let docs = generate_hierarchical_docs(&input, None);
2284 let docs_by_path = docs.into_iter().collect::<BTreeMap<_, _>>();
2285 let module = docs_by_path
2286 .get("modules/src/api.md")
2287 .expect("module doc still renders");
2288
2289 assert!(!module.contains("degraded: graph-unavailable"));
2290 }
2291
2292 #[test]
2293 fn citations_validated_against_spans() {
2294 let input = CodewikiInput {
2295 files: vec!["src/lib.rs".to_string()],
2296 graph_edges: Vec::new(),
2297 graph_availability: CodewikiGraphAvailability::Available,
2298 symbols: vec![
2299 test_symbol_range(
2300 "src/lib.rs",
2301 "Client",
2302 "class",
2303 10,
2304 14,
2305 "pub struct Client {",
2306 ),
2307 test_symbol_range(
2308 "src/lib.rs",
2309 "connect",
2310 "function",
2311 20,
2312 24,
2313 "pub fn connect()",
2314 ),
2315 ],
2316 };
2317 let mut generator = |prompt: &str, _system: &str| {
2318 if prompt.contains("Client") {
2319 Some("Builds client state [src/lib.rs:999].".to_string())
2320 } else if prompt.contains("connect") {
2321 Some("Opens a connection [src/lib.rs:20].".to_string())
2322 } else {
2323 Some("Coordinates the public API [missing.rs:1].".to_string())
2324 }
2325 };
2326
2327 let docs = generate_hierarchical_docs(&input, Some(&mut generator));
2328 let file_doc = docs
2329 .iter()
2330 .find(|(path, _)| path == "files/src/lib.rs.md")
2331 .map(|(_, content)| content)
2332 .expect("file doc");
2333
2334 assert!(file_doc.contains("source_files:\n"));
2335 assert!(file_doc.contains(" - file: \"src/lib.rs\"\n"));
2336 assert!(file_doc.contains(" ranges:\n"));
2337 assert!(file_doc.contains(" - \"10-14\"\n"));
2338 assert!(file_doc.contains(" - \"20-24\"\n"));
2339 assert!(file_doc.contains("[src/lib.rs:10-14]"));
2340 assert!(file_doc.contains("[src/lib.rs:20]"));
2341 assert!(!file_doc.contains("src/lib.rs:999"));
2342 assert!(!file_doc.contains("missing.rs:1"));
2343 }
2344
2345 #[test]
2346 fn incremental_regenerates_only_changed() {
2347 let project = tempfile::tempdir().expect("project tempdir");
2348 std::fs::create_dir_all(project.path().join("src/nested")).expect("source dirs");
2349 std::fs::write(project.path().join("src/lib.rs"), "pub struct Client;\n")
2350 .expect("write lib");
2351 std::fs::write(
2352 project.path().join("src/nested/api.rs"),
2353 "pub fn serve() {}\n",
2354 )
2355 .expect("write api");
2356 let out_dir = project.path().join("codewiki");
2357
2358 let input = CodewikiInput {
2359 files: vec!["src/lib.rs".to_string(), "src/nested/api.rs".to_string()],
2360 graph_edges: Vec::new(),
2361 graph_availability: CodewikiGraphAvailability::Available,
2362 symbols: vec![
2363 test_symbol("src/lib.rs", "Client", "class", 1, "pub struct Client;"),
2364 test_symbol(
2365 "src/nested/api.rs",
2366 "serve",
2367 "function",
2368 1,
2369 "pub fn serve()",
2370 ),
2371 ],
2372 };
2373
2374 let first_docs = generate_hierarchical_docs(&input, None);
2375 let first_written =
2376 write_incremental_doc_set(project.path(), &out_dir, &first_docs).expect("first write");
2377 assert!(first_written.contains(&"repo.md".to_string()));
2378 assert!(first_written.contains(&"modules/src.md".to_string()));
2379 assert!(first_written.contains(&"files/src/lib.rs.md".to_string()));
2380 assert!(first_written.contains(&"files/src/nested/api.rs.md".to_string()));
2381
2382 let unchanged_file_doc = out_dir.join("files/src/nested/api.rs.md");
2383 let mut unchanged_content =
2384 std::fs::read_to_string(&unchanged_file_doc).expect("unchanged doc content");
2385 unchanged_content.push_str("\n<!-- preserve unchanged doc -->\n");
2386 std::fs::write(&unchanged_file_doc, unchanged_content).expect("write unchanged marker");
2387
2388 std::fs::write(
2389 project.path().join("src/lib.rs"),
2390 "pub struct Client;\npub fn connect() {}\n",
2391 )
2392 .expect("modify lib");
2393 let changed_docs = generate_hierarchical_docs(&input, None);
2394 let changed_written = write_incremental_doc_set(project.path(), &out_dir, &changed_docs)
2395 .expect("incremental write");
2396 let unchanged_after =
2397 std::fs::read_to_string(&unchanged_file_doc).expect("unchanged doc after content");
2398
2399 assert!(unchanged_after.contains("preserve unchanged doc"));
2400 assert_eq!(
2401 changed_written,
2402 vec![
2403 "repo.md".to_string(),
2404 "modules/src.md".to_string(),
2405 "files/src/lib.rs.md".to_string()
2406 ]
2407 );
2408 let meta =
2409 std::fs::read_to_string(out_dir.join("_meta/codewiki.json")).expect("read meta log");
2410 let meta: serde_json::Value = serde_json::from_str(&meta).expect("parse meta log");
2411 let generated_docs = meta["generated_docs"].as_array().expect("generated docs");
2412 assert_eq!(
2413 generated_docs,
2414 &vec![
2415 serde_json::Value::String("repo.md".to_string()),
2416 serde_json::Value::String("modules/src.md".to_string()),
2417 serde_json::Value::String("files/src/lib.rs.md".to_string())
2418 ]
2419 );
2420
2421 let reduced_input = CodewikiInput {
2422 files: vec!["src/lib.rs".to_string()],
2423 graph_edges: Vec::new(),
2424 graph_availability: CodewikiGraphAvailability::Available,
2425 symbols: vec![test_symbol(
2426 "src/lib.rs",
2427 "Client",
2428 "class",
2429 1,
2430 "pub struct Client;",
2431 )],
2432 };
2433 let reduced_docs = generate_hierarchical_docs(&reduced_input, None);
2434 write_incremental_doc_set(project.path(), &out_dir, &reduced_docs)
2435 .expect("stale docs removed");
2436
2437 assert!(!unchanged_file_doc.exists());
2438 let meta =
2439 std::fs::read_to_string(out_dir.join("_meta/codewiki.json")).expect("read final meta");
2440 let meta: serde_json::Value = serde_json::from_str(&meta).expect("parse final meta");
2441 assert!(meta["docs"].get("files/src/nested/api.rs.md").is_none());
2442 }
2443
2444 #[test]
2445 fn run_summary_serializes_daemon_contract_keys() {
2446 let summary = CodewikiRunSummary {
2447 command: "codewiki",
2448 project_id: "project-1".to_string(),
2449 project_root: "/repo".to_string(),
2450 out_dir: "/repo/codewiki".to_string(),
2451 generated_pages: 3,
2452 changed_paths: vec!["repo.md".to_string()],
2453 skipped: 2,
2454 files: 1,
2455 modules: 1,
2456 symbols: 4,
2457 ai_enabled: false,
2458 };
2459
2460 let value = serde_json::to_value(summary).expect("summary json");
2461
2462 assert_eq!(value["command"], "codewiki");
2463 assert_eq!(value["project_id"], "project-1");
2464 assert_eq!(value["project_root"], "/repo");
2465 assert_eq!(value["changed_paths"][0], "repo.md");
2466 assert_eq!(value["skipped"], 2);
2467 assert_eq!(value["ai_enabled"], false);
2468 }
2469
2470 #[test]
2471 fn component_id_uses_stored_symbol_id() {
2472 let mut symbol = test_symbol("src/lib.rs", "Client", "class", 1, "pub struct Client;");
2473 symbol.id = "stored-symbol-id".to_string();
2474 assert_eq!(component_id(&symbol), "stored-symbol-id");
2475 }
2476
2477 #[test]
2478 #[cfg(unix)]
2479 fn write_doc_rejects_symlinked_parent() {
2480 use std::os::unix::fs::symlink;
2481
2482 let project = tempfile::tempdir().expect("project tempdir");
2483 let out_dir = project.path().join("codewiki");
2484 let outside = tempfile::tempdir().expect("outside tempdir");
2485 std::fs::create_dir_all(&out_dir).expect("out dir");
2486 symlink(outside.path(), out_dir.join("linked")).expect("symlink parent");
2487
2488 let err = write_doc(&out_dir, "linked/escape.md", "escaped")
2489 .expect_err("symlink parent should be rejected");
2490
2491 assert!(err.to_string().contains("symlinked codewiki path"));
2492 assert!(!outside.path().join("escape.md").exists());
2493 }
2494
2495 #[test]
2496 #[cfg(unix)]
2497 fn write_doc_rejects_symlinked_target() {
2498 use std::os::unix::fs::symlink;
2499
2500 let project = tempfile::tempdir().expect("project tempdir");
2501 let out_dir = project.path().join("codewiki");
2502 let outside = tempfile::tempdir().expect("outside tempdir");
2503 std::fs::create_dir_all(&out_dir).expect("out dir");
2504 let outside_target = outside.path().join("target.md");
2505 symlink(&outside_target, out_dir.join("target.md")).expect("symlink target");
2506
2507 let err = write_doc(&out_dir, "target.md", "escaped").expect_err("symlink target rejected");
2508
2509 assert!(err.to_string().contains("symlinked codewiki path"));
2510 assert!(!outside_target.exists());
2511 }
2512
2513 fn test_symbol(
2514 file_path: &str,
2515 name: &str,
2516 kind: &str,
2517 line_start: usize,
2518 signature: &str,
2519 ) -> Symbol {
2520 test_symbol_with_qualified(file_path, name, name, kind, line_start, signature)
2521 }
2522
2523 fn test_component_id(file_path: &str, name: &str, kind: &str) -> String {
2524 Symbol::make_id("project-1", file_path, name, kind, 0)
2525 }
2526
2527 fn test_symbol_with_qualified(
2528 file_path: &str,
2529 name: &str,
2530 qualified_name: &str,
2531 kind: &str,
2532 line_start: usize,
2533 signature: &str,
2534 ) -> Symbol {
2535 Symbol {
2536 id: Symbol::make_id("project-1", file_path, name, kind, 0),
2537 project_id: "project-1".to_string(),
2538 file_path: file_path.to_string(),
2539 name: name.to_string(),
2540 qualified_name: qualified_name.to_string(),
2541 kind: kind.to_string(),
2542 language: "rust".to_string(),
2543 byte_start: 0,
2544 byte_end: 0,
2545 line_start,
2546 line_end: line_start,
2547 signature: Some(signature.to_string()),
2548 docstring: None,
2549 parent_symbol_id: None,
2550 content_hash: String::new(),
2551 summary: None,
2552 created_at: String::new(),
2553 updated_at: String::new(),
2554 }
2555 }
2556
2557 fn test_symbol_range(
2558 file_path: &str,
2559 name: &str,
2560 kind: &str,
2561 line_start: usize,
2562 line_end: usize,
2563 signature: &str,
2564 ) -> Symbol {
2565 Symbol {
2566 id: Symbol::make_id("project-1", file_path, name, kind, 0),
2567 project_id: "project-1".to_string(),
2568 file_path: file_path.to_string(),
2569 name: name.to_string(),
2570 qualified_name: name.to_string(),
2571 kind: kind.to_string(),
2572 language: "rust".to_string(),
2573 byte_start: 0,
2574 byte_end: 0,
2575 line_start,
2576 line_end,
2577 signature: Some(signature.to_string()),
2578 docstring: None,
2579 parent_symbol_id: None,
2580 content_hash: String::new(),
2581 summary: None,
2582 created_at: String::new(),
2583 updated_at: String::new(),
2584 }
2585 }
2586}