1use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque};
2use std::path::{Path, PathBuf};
3
4use gobby_core::ai::{daemon::generate_via_daemon, effective_route, text::generate_text};
5use gobby_core::ai_context::{AiConfigSource, AiContext, AiContextOptions, PostgresAiConfigSource};
6use gobby_core::config::{AiCapability, AiRouting};
7use serde::{Deserialize, Serialize};
8
9use crate::commands::scope;
10use crate::config::{self, Context};
11use crate::db;
12use crate::graph::typed_query;
13use crate::index::hasher;
14use crate::models::Symbol;
15use crate::output::{self, Format};
16use crate::secrets;
17use crate::visibility;
18
19const DEFAULT_OUT_DIR: &str = "codewiki";
20const CODEWIKI_META_PATH: &str = "_meta/codewiki.json";
21const MAX_MERMAID_HOPS: usize = 2;
22const MAX_MERMAID_EDGES: usize = 20;
23const MAX_EDGE_LIMIT: usize = 100_000;
24
25mod build;
26mod cluster;
27mod graph;
28mod io;
29mod paths;
30mod prompts;
31mod render;
32mod text;
33
34pub(crate) use build::{build_file_doc, build_module_docs};
36pub(crate) use cluster::{
38 cluster_file_modules, files_for_import_target, first_component_for_file,
39 symbols_by_file_component,
40};
41#[cfg(test)]
42pub(crate) use cluster::{common_module_for_files, find_file_root};
43pub(crate) use graph::fetch_codewiki_graph_edges;
45#[cfg(test)]
46pub(crate) use graph::{codewiki_call_edges_query, codewiki_import_edges_query};
47pub(crate) use paths::{
49 component_label, direct_child_modules, file_doc_path, file_wikilink, in_scope, inline_code,
50 is_core_file, module_ancestors, module_depth, module_doc_path, module_for_file,
51 module_is_ancestor, module_wikilink, parent_module, plural,
52};
53pub(crate) use render::{
55 build_repo_doc, render_file_doc, render_module_call_mermaid, render_module_dependency_mermaid,
56 render_module_doc,
57};
58pub(crate) use text::{
60 citation_list, collect_link_spans, frontmatter, ground_text, maybe_generate,
61 resolve_text_generator, structural_file_summary, structural_module_summary,
62 structural_repo_summary, structural_symbol_purpose, write_section,
63};
64
65pub use io::{write_doc_set, write_incremental_doc_set};
66
67#[derive(Debug, Clone)]
68pub struct CodewikiInput {
69 pub files: Vec<String>,
70 pub graph_edges: Vec<CodewikiGraphEdge>,
71 pub graph_availability: CodewikiGraphAvailability,
72 pub symbols: Vec<Symbol>,
73}
74
75#[derive(Debug, Clone, PartialEq, Eq)]
76pub struct CodewikiGraphEdge {
77 pub source_component_id: String,
78 pub target_component_id: String,
79 pub kind: CodewikiGraphEdgeKind,
80}
81
82impl CodewikiGraphEdge {
83 pub fn call(
84 source_component_id: impl Into<String>,
85 target_component_id: impl Into<String>,
86 ) -> Self {
87 Self {
88 source_component_id: source_component_id.into(),
89 target_component_id: target_component_id.into(),
90 kind: CodewikiGraphEdgeKind::Call,
91 }
92 }
93
94 pub fn import(
95 source_component_id: impl Into<String>,
96 target_component_id: impl Into<String>,
97 ) -> Self {
98 Self {
99 source_component_id: source_component_id.into(),
100 target_component_id: target_component_id.into(),
101 kind: CodewikiGraphEdgeKind::Import,
102 }
103 }
104}
105
106#[derive(Debug, Clone, Copy, PartialEq, Eq)]
107pub enum CodewikiGraphEdgeKind {
108 Call,
109 Import,
110}
111
112#[derive(Debug, Clone)]
113pub(crate) struct CodewikiGraph {
114 edges: Vec<CodewikiGraphEdge>,
115 availability: CodewikiGraphAvailability,
116}
117
118impl CodewikiGraph {
119 fn available(edges: Vec<CodewikiGraphEdge>) -> Self {
120 Self {
121 edges,
122 availability: CodewikiGraphAvailability::Available,
123 }
124 }
125
126 fn truncated(edges: Vec<CodewikiGraphEdge>) -> Self {
127 Self {
128 edges,
129 availability: CodewikiGraphAvailability::Truncated,
130 }
131 }
132
133 fn unavailable() -> Self {
134 Self {
135 edges: Vec::new(),
136 availability: CodewikiGraphAvailability::Unavailable,
137 }
138 }
139}
140
141#[derive(Debug, Clone, Copy, PartialEq, Eq)]
142pub enum CodewikiGraphAvailability {
143 Available,
144 Truncated,
145 Unavailable,
146}
147
148#[derive(Debug, Clone)]
149pub(crate) struct FileDoc {
150 path: String,
151 module: String,
152 summary: String,
153 source_spans: Vec<SourceSpan>,
154 symbols: Vec<SymbolDoc>,
155 component_ids: Vec<String>,
156}
157
158#[derive(Debug, Clone)]
159pub(crate) struct SymbolDoc {
160 symbol: Symbol,
161 purpose: String,
162 component_id: String,
163 component_label: String,
164 source_span: SourceSpan,
165}
166
167#[derive(Debug, Clone)]
168pub(crate) struct ModuleDoc {
169 module: String,
170 summary: String,
171 source_spans: Vec<SourceSpan>,
172 direct_files: Vec<FileLink>,
173 child_modules: Vec<ModuleLink>,
174 component_ids: Vec<String>,
175 dependency_diagram: Option<String>,
176 call_diagram: Option<String>,
177 graph_availability: CodewikiGraphAvailability,
178}
179
180#[derive(Debug, Clone)]
181pub(crate) struct FileLink {
182 path: String,
183 summary: String,
184 source_spans: Vec<SourceSpan>,
185}
186
187#[derive(Debug, Clone)]
188pub(crate) struct ModuleLink {
189 module: String,
190 summary: String,
191 source_spans: Vec<SourceSpan>,
192}
193
194#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]
195pub(crate) struct SourceSpan {
196 file: String,
197 line_start: usize,
198 line_end: usize,
199}
200
201#[derive(Debug, Clone, Serialize)]
202pub struct CodewikiRunSummary {
203 pub command: &'static str,
204 pub project_id: String,
205 pub project_root: String,
206 pub out_dir: String,
207 pub generated_pages: usize,
208 pub changed_paths: Vec<String>,
209 pub skipped: usize,
210 pub files: usize,
211 pub modules: usize,
212 pub symbols: usize,
213 pub ai_enabled: bool,
214}
215
216#[derive(Debug, Clone, Default, Deserialize, Serialize)]
217pub(crate) struct CodewikiMeta {
218 docs: BTreeMap<String, CodewikiDocMeta>,
219 generated_docs: Vec<String>,
220}
221
222#[derive(Debug, Clone, Default, Deserialize, Eq, PartialEq, Serialize)]
223pub(crate) struct CodewikiDocMeta {
224 source_hashes: BTreeMap<String, String>,
225}
226
227pub type TextGenerator<'a> = dyn FnMut(&str, &str) -> Option<String> + 'a;
228
229impl SourceSpan {
230 fn from_symbol(symbol: &Symbol) -> Self {
231 Self {
232 file: symbol.file_path.clone(),
233 line_start: symbol.line_start,
234 line_end: symbol.line_end,
235 }
236 }
237
238 fn citation(&self) -> String {
239 if self.line_start == self.line_end {
240 format!("[{}:{}]", self.file, self.line_start)
241 } else {
242 format!("[{}:{}-{}]", self.file, self.line_start, self.line_end)
243 }
244 }
245
246 fn contains(&self, file: &str, line_start: usize, line_end: usize) -> bool {
247 self.file == file && self.line_start <= line_start && line_end <= self.line_end
248 }
249}
250
251pub fn run(
252 ctx: &Context,
253 out: Option<String>,
254 scope_args: Vec<String>,
255 ai: Option<AiRouting>,
256 edge_limit: usize,
257 format: Format,
258) -> anyhow::Result<()> {
259 validate_edge_limit(edge_limit)?;
260
261 let mut conn = db::connect_readonly(&ctx.database_url)?;
262 let scopes = scope_args
263 .iter()
264 .map(|value| scope::normalize_file_arg(ctx, value))
265 .collect::<Vec<_>>();
266 let files = visibility::visible_tree(&mut conn, ctx)?
267 .into_iter()
268 .map(|file| file.file_path)
269 .filter(|file| in_scope(file, &scopes))
270 .collect::<Vec<_>>();
271 let mut symbols = Vec::new();
272 for file in &files {
273 symbols.extend(visibility::visible_symbols_for_file(&mut conn, ctx, file)?);
274 }
275
276 let graph = fetch_codewiki_graph_edges(ctx, &files, &symbols, edge_limit)?;
277 let input = CodewikiInput {
278 files,
279 graph_edges: graph.edges,
280 graph_availability: graph.availability,
281 symbols,
282 };
283 let mut generator = resolve_text_generator(ctx, ai);
284 let ai_enabled = generator.is_some();
285 let docs = generate_hierarchical_docs(&input, generator.as_deref_mut());
286 let module_count = docs
287 .iter()
288 .filter(|(path, _)| path.starts_with("modules/"))
289 .count();
290 let file_count = docs
291 .iter()
292 .filter(|(path, _)| path.starts_with("files/"))
293 .count();
294 let symbol_count = input
295 .symbols
296 .iter()
297 .filter(|symbol| is_core_file(&symbol.file_path))
298 .count();
299 let out_dir = out.unwrap_or_else(|| DEFAULT_OUT_DIR.to_string());
300 let changed_paths = write_incremental_doc_set(&ctx.project_root, Path::new(&out_dir), &docs)?;
301 let generated_pages = docs.len();
302 let skipped = generated_pages.saturating_sub(changed_paths.len());
303
304 let summary = CodewikiRunSummary {
305 command: "codewiki",
306 project_id: ctx.project_id.clone(),
307 project_root: ctx.project_root.display().to_string(),
308 out_dir,
309 generated_pages,
310 changed_paths,
311 skipped,
312 files: file_count,
313 modules: module_count,
314 symbols: symbol_count,
315 ai_enabled,
316 };
317 match format {
318 Format::Json => output::print_json(&summary),
319 Format::Text => output::print_text(&format!(
320 "wrote {} file docs, {} module docs, and repo.md to {}",
321 summary.files, summary.modules, summary.out_dir
322 )),
323 }?;
324
325 Ok(())
326}
327
328fn validate_edge_limit(edge_limit: usize) -> anyhow::Result<()> {
329 if (1..=MAX_EDGE_LIMIT).contains(&edge_limit) {
330 return Ok(());
331 }
332 anyhow::bail!("codewiki --edge-limit must be between 1 and {MAX_EDGE_LIMIT}, got {edge_limit}")
333}
334
335pub fn generate_hierarchical_docs(
336 input: &CodewikiInput,
337 generate: Option<&mut TextGenerator<'_>>,
338) -> Vec<(String, String)> {
339 generate_hierarchical_docs_with_graph_availability(input, generate)
340}
341
342fn generate_hierarchical_docs_with_graph_availability(
343 input: &CodewikiInput,
344 mut generate: Option<&mut TextGenerator<'_>>,
345) -> Vec<(String, String)> {
346 let mut files = input
347 .files
348 .iter()
349 .filter(|file| is_core_file(file))
350 .cloned()
351 .collect::<BTreeSet<_>>();
352 for symbol in &input.symbols {
353 if is_core_file(&symbol.file_path) {
354 files.insert(symbol.file_path.clone());
355 }
356 }
357 let files = files.into_iter().collect::<Vec<_>>();
358
359 let mut symbols_by_file: BTreeMap<String, Vec<Symbol>> = BTreeMap::new();
360 for symbol in &input.symbols {
361 if !is_core_file(&symbol.file_path) {
362 continue;
363 }
364 symbols_by_file
365 .entry(symbol.file_path.clone())
366 .or_default()
367 .push(symbol.clone());
368 }
369 for symbols in symbols_by_file.values_mut() {
370 symbols.sort_by_key(|symbol| (symbol.line_start, symbol.byte_start, symbol.name.clone()));
371 }
372
373 let file_modules = cluster_file_modules(&files, &symbols_by_file, &input.graph_edges);
374 let file_docs = files
375 .iter()
376 .map(|file| {
377 build_file_doc(
378 file,
379 file_modules
380 .get(file)
381 .cloned()
382 .unwrap_or_else(|| module_for_file(file)),
383 symbols_by_file.remove(file).unwrap_or_default(),
384 &mut generate,
385 )
386 })
387 .collect::<Vec<_>>();
388 let module_docs = build_module_docs(
389 &file_docs,
390 &input.graph_edges,
391 input.graph_availability,
392 &mut generate,
393 );
394 let repo_doc = build_repo_doc(&file_docs, &module_docs, &mut generate);
395
396 let mut docs = Vec::new();
397 docs.push(("repo.md".to_string(), repo_doc));
398 for module in &module_docs {
399 docs.push((module_doc_path(&module.module), render_module_doc(module)));
400 }
401 for file in &file_docs {
402 docs.push((file_doc_path(&file.path), render_file_doc(file)));
403 }
404 docs
405}
406
407#[cfg(test)]
408mod tests;