Skip to main content

gobby_code/commands/codewiki/
run.rs

1use std::collections::BTreeMap;
2use std::path::Path;
3
4use crate::commands::scope;
5use crate::config::{self, Context};
6use crate::db;
7use crate::models::Symbol;
8use crate::output::{self, Format};
9use crate::visibility;
10
11use super::{
12    BuiltDoc, CodewikiAiOptions, CodewikiInput, CodewikiProgress, CodewikiRunSummary,
13    DEFAULT_OUT_DIR, DocPruneScope, DocSink, LeadingChunk, MAX_EDGE_LIMIT, ReusePlan,
14    build_audit_context, build_codewiki_changes_doc, build_codewiki_index_snapshot,
15    build_feature_catalog_doc, build_system_model, build_truth_digest, fetch_codewiki_graph_edges,
16    generation, in_scope, io, is_core_file, read_ownership_meta, resolve_text_generator,
17    resolve_text_verifier, write_ownership_meta, write_truth_digest,
18};
19
20// CLI entry point: each parameter maps to a distinct codewiki flag, so the
21// argument count tracks the command surface rather than hidden coupling.
22#[allow(clippy::too_many_arguments)]
23pub fn run(
24    ctx: &Context,
25    out: Option<String>,
26    scope_args: Vec<String>,
27    ai: CodewikiAiOptions,
28    edge_limit: usize,
29    include_docs: bool,
30    since: Option<String>,
31    format: Format,
32    verbose: bool,
33) -> anyhow::Result<()> {
34    validate_edge_limit(edge_limit)?;
35    let ai_depth = ai.depth;
36
37    let mut progress = CodewikiProgress::stderr(verbose && !ctx.quiet);
38
39    let mut conn = db::connect_readonly(&ctx.database_url)?;
40    let scopes = scope_args
41        .iter()
42        .map(|value| scope::normalize_file_arg(ctx, value))
43        .collect::<Vec<_>>();
44    progress.emit("loading indexed files");
45    let files = visibility::visible_tree(&mut conn, ctx)?
46        .into_iter()
47        .filter(|file| should_document_file(&file.file_path, include_docs))
48        .map(|file| file.file_path)
49        .filter(|file| in_scope(file, &scopes))
50        .collect::<Vec<_>>();
51    let symbols = load_symbols_for_codewiki(&files, &mut progress, |paths| {
52        visibility::visible_symbols_for_files(&mut conn, ctx, paths)
53    })?;
54
55    progress.emit(format!(
56        "fetching graph edges for {} files and {} symbols (limit {})",
57        files.len(),
58        symbols.len(),
59        edge_limit
60    ));
61    progress.emit("loading leading content chunks");
62    let leading_chunks = load_leading_chunks(&mut conn, ctx, &files)?;
63
64    let graph = fetch_codewiki_graph_edges(ctx, &files, &symbols, edge_limit)?;
65    let input = CodewikiInput {
66        files,
67        graph_edges: graph.edges,
68        graph_availability: graph.availability,
69        symbols,
70        leading_chunks,
71    };
72    // Deterministic workspace model (#891), read straight off the project's
73    // Cargo manifests. Seeds the architecture page's model-derived Mermaid
74    // diagrams. A partial/empty model simply omits diagrams — never an error.
75    let system_model = build_system_model(&ctx.project_root);
76    // Deterministic feature catalog (#888), built from the pinned CLI contract
77    // JSONs + dispatch resolver. Read straight off the project root; missing or
78    // unparseable contracts simply omit that binary's section — never an error.
79    let feature_catalog = build_feature_catalog_doc(&ctx.project_root, &input.files);
80    // Deterministic audit context (#889): scans the documented source for
81    // deprecation markers and the test-gated symbol set. Drives the per-symbol
82    // deprecation badge, the `code/deprecations.md` page, and the file page's
83    // test-count collapse. Read straight off the project root; unreadable files
84    // are skipped — never an error, never degrading.
85    let audit_context = build_audit_context(&ctx.project_root, &input);
86    let mut generator = resolve_text_generator(ctx, &ai);
87    let mut verifier = resolve_text_verifier(ctx, &ai);
88    let ai_enabled = generator.is_some();
89    let ai_mode = if ai_enabled {
90        ai_depth.mode_label()
91    } else {
92        "off"
93    };
94    let out_dir = out.unwrap_or_else(|| DEFAULT_OUT_DIR.to_string());
95    let out_path = Path::new(&out_dir);
96    let doc_scope = DocPruneScope::from_scopes(&scopes);
97    // `--since <ref>` scopes regeneration to the files git reports changed since
98    // the ref plus their dependents, instead of a full content-hash scan of
99    // every page (Leaf H, #893). A source page whose own sources and neighbors
100    // are all unchanged-since-ref is left exactly as it is; keyed aggregate
101    // pages (architecture/infrastructure/features/audit) still re-check their
102    // model digest, so a manifest/contract change rebuilds them even here.
103    let since_changed = match since.as_deref() {
104        Some(since_ref) => {
105            progress.emit(format!("scoping to git changes since {since_ref}"));
106            Some(git_changed_files(&ctx.project_root, since_ref)?)
107        }
108        None => None,
109    };
110    if doc_scope.is_unscoped() {
111        progress.emit("reading metadata and hashing snapshot");
112    } else {
113        progress.emit("reading metadata for scoped write");
114    }
115    let previous_meta = if doc_scope.is_unscoped() {
116        Some(io::read_codewiki_meta(out_path)?)
117    } else {
118        None
119    };
120    let index_snapshot = if doc_scope.is_unscoped() {
121        Some(build_codewiki_index_snapshot(&ctx.project_root, &input)?)
122    } else {
123        None
124    };
125    let mut ownership_meta = if doc_scope.is_unscoped() {
126        Some(read_ownership_meta(out_path)?)
127    } else {
128        None
129    };
130    let mut reuse_plan =
131        ReusePlan::load_with_since(&ctx.project_root, out_path, ai_mode, since_changed.clone())?;
132    let mut reuse = Some(&mut reuse_plan);
133    let mut sink =
134        DocSink::open_with_prune_scope(&ctx.project_root, out_path, ai_mode, doc_scope.clone())?
135            .with_since(since_changed);
136    let mut generated_pages = 0_usize;
137    let mut module_count = 0_usize;
138    let mut file_count = 0_usize;
139    // Persist each doc and its meta entry as soon as it is built, so a killed
140    // run keeps everything generated so far and a re-run resumes from disk.
141    let mut emit = |doc: BuiltDoc| -> anyhow::Result<()> {
142        generated_pages += 1;
143        if doc.path.starts_with("code/modules/") {
144            module_count += 1;
145        }
146        if doc.path.starts_with("code/files/") {
147            file_count += 1;
148        }
149        sink.persist(&doc)?;
150        Ok(())
151    };
152    generation::generate_hierarchical_docs_with_ownership(
153        &input,
154        ownership_meta
155            .as_mut()
156            .map(|meta| (ctx.project_root.as_path(), meta)),
157        Some(&system_model),
158        feature_catalog.as_ref(),
159        Some(&audit_context),
160        generator.as_deref_mut(),
161        verifier.as_deref_mut(),
162        ai_depth,
163        &mut reuse,
164        &mut progress,
165        &doc_scope,
166        &mut emit,
167    )?;
168    if let Some(index_snapshot) = index_snapshot.as_ref() {
169        progress.emit("generating changes docs");
170        emit(BuiltDoc::healthy(
171            "code/_changes.md",
172            build_codewiki_changes_doc(
173                previous_meta
174                    .as_ref()
175                    .and_then(|meta| meta.index_snapshot.as_ref()),
176                index_snapshot,
177            )?,
178        ))?;
179    }
180    if let Some(ownership_meta) = ownership_meta.as_ref() {
181        write_ownership_meta(out_path, ownership_meta)?;
182    }
183    let symbol_count = input
184        .symbols
185        .iter()
186        .filter(|symbol| is_core_file(&symbol.file_path))
187        .count();
188    // Surface degraded pages (a failed AI pass fell back to the structural
189    // body, #900) instead of letting them hide silently in the meta cache. Read
190    // before `finish` consumes the sink.
191    let degraded_pages = sink.degraded_docs().to_vec();
192    if !degraded_pages.is_empty() && !ctx.quiet {
193        // Warn on stderr at parity with the per-file "text generation failed ...
194        // record degraded: true" line (text/generation.rs), so a degraded
195        // curated/aggregate pass is visible regardless of --verbose rather than
196        // only summarized in the run result.
197        eprintln!(
198            "codewiki: {} page(s) degraded to structural fallback (AI content \
199             pass failed): {}",
200            degraded_pages.len(),
201            degraded_pages.join(", ")
202        );
203    }
204    let changed_paths = sink.finish(index_snapshot)?;
205    let skipped = generated_pages.saturating_sub(changed_paths.len());
206    if doc_scope.is_unscoped() {
207        let truth_digest =
208            build_truth_digest(&system_model, &ctx.project_id, file_count, module_count);
209        write_truth_digest(out_path, &doc_scope, &truth_digest)?;
210    }
211
212    let summary = CodewikiRunSummary {
213        command: "codewiki",
214        project_id: ctx.project_id.clone(),
215        project_root: ctx.project_root.display().to_string(),
216        out_dir,
217        generated_pages,
218        changed_paths,
219        skipped,
220        files: file_count,
221        modules: module_count,
222        symbols: symbol_count,
223        ai_enabled,
224        degraded_pages,
225    };
226    match format {
227        Format::Json => output::print_json(&summary),
228        Format::Text => {
229            if doc_scope.is_unscoped() {
230                output::print_text(&format!(
231                    "wrote {} file docs, {} module docs, and repo.md to {}",
232                    summary.files, summary.modules, summary.out_dir
233                ))
234            } else {
235                output::print_text(&format!(
236                    "wrote {} scoped file docs and {} scoped module docs to {}",
237                    summary.files, summary.modules, summary.out_dir
238                ))
239            }
240        }
241    }?;
242
243    Ok(())
244}
245
246/// Repair-only entry for `codewiki --repair-citations`: re-anchors every
247/// generated page's `[file:line]` citations against the current index and
248/// rewrites only the pages whose citations changed. No generation, no AI/LLM
249/// calls. Loads the full visible symbol set (like [`run`]) so a citation to any
250/// indexed file can resolve, then prints the [`super::CitationRepairSummary`].
251pub fn run_repair(ctx: &Context, out: Option<String>, format: Format) -> anyhow::Result<()> {
252    let mut conn = db::connect_readonly(&ctx.database_url)?;
253    let files = visibility::visible_tree(&mut conn, ctx)?
254        .into_iter()
255        .map(|file| file.file_path)
256        .collect::<Vec<_>>();
257    let symbols = visibility::visible_symbols_for_files(&mut conn, ctx, &files)?;
258    let out_dir = out.unwrap_or_else(|| DEFAULT_OUT_DIR.to_string());
259    let summary = super::repair_citations(Path::new(&out_dir), &symbols)?;
260    match format {
261        Format::Json => output::print_json(&summary),
262        Format::Text => output::print_text(&format!(
263            "scanned {} pages; repaired {} pages, {} citations; {} unresolved",
264            summary.pages_scanned,
265            summary.pages_repaired,
266            summary.citations_repaired,
267            summary.citations_unresolved,
268        )),
269    }?;
270    Ok(())
271}
272
273pub(crate) fn validate_edge_limit(edge_limit: usize) -> anyhow::Result<()> {
274    if (1..=MAX_EDGE_LIMIT).contains(&edge_limit) {
275        return Ok(());
276    }
277    anyhow::bail!("codewiki --edge-limit must be between 1 and {MAX_EDGE_LIMIT}, got {edge_limit}")
278}
279
280/// Repo-relative paths git reports changed between `since_ref` and the working
281/// tree — the change set that drives `--since` incremental regeneration (Leaf H,
282/// #893). An invalid ref or a missing git binary is surfaced as an error rather
283/// than silently falling back to a full scan, so a typo'd `--since` fails loudly.
284pub(crate) fn git_changed_files(
285    project_root: &Path,
286    since_ref: &str,
287) -> anyhow::Result<std::collections::BTreeSet<String>> {
288    let output = std::process::Command::new("git")
289        .arg("-C")
290        .arg(project_root)
291        .args(["diff", "--name-only", "--relative", since_ref])
292        .output()
293        .map_err(|err| anyhow::anyhow!("failed to run git diff for --since {since_ref}: {err}"))?;
294    if !output.status.success() {
295        anyhow::bail!(
296            "git diff --name-only --relative {since_ref} failed: {}",
297            String::from_utf8_lossy(&output.stderr).trim()
298        );
299    }
300    Ok(String::from_utf8_lossy(&output.stdout)
301        .lines()
302        .map(str::trim)
303        .filter(|line| !line.is_empty())
304        .map(str::to_string)
305        .collect())
306}
307
308/// codewiki documents code and structured config — any file the indexer
309/// recognizes as an AST or json/yaml language. Content-only files (markdown,
310/// plain text, license/lock files) are gwiki's domain, so codewiki skips them.
311fn documents_file(file_path: &str) -> bool {
312    crate::index::languages::detect_language(file_path).is_some()
313}
314
315/// Whether codewiki should emit a file doc for `file_path`. Content-only files
316/// are skipped unless the caller opts back in with `--include-docs`.
317pub(crate) fn should_document_file(file_path: &str, include_docs: bool) -> bool {
318    include_docs || documents_file(file_path)
319}
320
321pub(crate) fn load_symbols_for_codewiki(
322    files: &[String],
323    progress: &mut CodewikiProgress,
324    mut load_symbols: impl FnMut(&[String]) -> anyhow::Result<Vec<Symbol>>,
325) -> anyhow::Result<Vec<Symbol>> {
326    progress.emit(format!("loading symbols for {} files", files.len()));
327    load_symbols(files)
328}
329
330/// Loads each file's first indexed content chunk (`chunk_index = 0`) from the
331/// hub. Overlay scopes prefer overlay rows and fall back to the parent
332/// project for files the overlay has not re-indexed.
333fn load_leading_chunks(
334    conn: &mut postgres::Client,
335    ctx: &Context,
336    files: &[String],
337) -> anyhow::Result<BTreeMap<String, LeadingChunk>> {
338    let mut chunks = BTreeMap::new();
339    if files.is_empty() {
340        return Ok(chunks);
341    }
342    let project_ids = match &ctx.index_scope {
343        config::ProjectIndexScope::Single => vec![ctx.project_id.clone()],
344        config::ProjectIndexScope::Overlay {
345            overlay_project_id,
346            parent_project_id,
347            ..
348        } => vec![overlay_project_id.clone(), parent_project_id.clone()],
349    };
350    for project_id in project_ids {
351        let rows = conn.query(
352            "SELECT file_path,
353                    line_start::BIGINT AS line_start,
354                    line_end::BIGINT AS line_end,
355                    content
356             FROM code_content_chunks
357             WHERE project_id = $1 AND file_path = ANY($2) AND chunk_index = 0",
358            &[&project_id, &files],
359        )?;
360        for row in rows {
361            let file_path: String = row.get("file_path");
362            if chunks.contains_key(&file_path) {
363                continue;
364            }
365            let line_start: i64 = row.get("line_start");
366            let line_end: i64 = row.get("line_end");
367            let content: String = row.get("content");
368            chunks.insert(
369                file_path,
370                LeadingChunk {
371                    content,
372                    line_start: usize::try_from(line_start).unwrap_or(0),
373                    line_end: usize::try_from(line_end).unwrap_or(0),
374                },
375            );
376        }
377    }
378    Ok(chunks)
379}