1use std::collections::BTreeMap;
2use std::path::Path;
3
4use crate::commands::scope;
5use crate::config::{self, Context};
6use crate::db;
7use crate::models::Symbol;
8use crate::output::{self, Format};
9use crate::visibility;
10
11use super::{
12 BuiltDoc, CodewikiAiOptions, CodewikiInput, CodewikiProgress, CodewikiRunSummary,
13 DEFAULT_OUT_DIR, DocPruneScope, DocSink, LeadingChunk, MAX_EDGE_LIMIT, ReusePlan,
14 build_audit_context, build_codewiki_changes_doc, build_codewiki_index_snapshot,
15 build_feature_catalog_doc, build_system_model, build_truth_digest, fetch_codewiki_graph_edges,
16 generation, in_scope, io, is_core_file, read_ownership_meta, resolve_text_generator,
17 resolve_text_verifier, write_ownership_meta, write_truth_digest,
18};
19
20#[allow(clippy::too_many_arguments)]
23pub fn run(
24 ctx: &Context,
25 out: Option<String>,
26 scope_args: Vec<String>,
27 ai: CodewikiAiOptions,
28 edge_limit: usize,
29 include_docs: bool,
30 since: Option<String>,
31 format: Format,
32 verbose: bool,
33) -> anyhow::Result<()> {
34 validate_edge_limit(edge_limit)?;
35 let ai_depth = ai.depth;
36
37 let mut progress = CodewikiProgress::stderr(verbose && !ctx.quiet);
38
39 let mut conn = db::connect_readonly(&ctx.database_url)?;
40 let scopes = scope_args
41 .iter()
42 .map(|value| scope::normalize_file_arg(ctx, value))
43 .collect::<Vec<_>>();
44 progress.emit("loading indexed files");
45 let files = visibility::visible_tree(&mut conn, ctx)?
46 .into_iter()
47 .filter(|file| should_document_file(&file.file_path, include_docs))
48 .map(|file| file.file_path)
49 .filter(|file| in_scope(file, &scopes))
50 .collect::<Vec<_>>();
51 let symbols = load_symbols_for_codewiki(&files, &mut progress, |paths| {
52 visibility::visible_symbols_for_files(&mut conn, ctx, paths)
53 })?;
54
55 progress.emit(format!(
56 "fetching graph edges for {} files and {} symbols (limit {})",
57 files.len(),
58 symbols.len(),
59 edge_limit
60 ));
61 progress.emit("loading leading content chunks");
62 let leading_chunks = load_leading_chunks(&mut conn, ctx, &files)?;
63
64 let graph = fetch_codewiki_graph_edges(ctx, &files, &symbols, edge_limit)?;
65 let input = CodewikiInput {
66 files,
67 graph_edges: graph.edges,
68 graph_availability: graph.availability,
69 symbols,
70 leading_chunks,
71 };
72 let system_model = build_system_model(&ctx.project_root);
76 let feature_catalog = build_feature_catalog_doc(&ctx.project_root, &input.files);
80 let audit_context = build_audit_context(&ctx.project_root, &input);
86 let mut generator = resolve_text_generator(ctx, &ai);
87 let mut verifier = resolve_text_verifier(ctx, &ai);
88 let ai_enabled = generator.is_some();
89 let ai_mode = if ai_enabled {
90 ai_depth.mode_label()
91 } else {
92 "off"
93 };
94 let out_dir = out.unwrap_or_else(|| DEFAULT_OUT_DIR.to_string());
95 let out_path = Path::new(&out_dir);
96 let doc_scope = DocPruneScope::from_scopes(&scopes);
97 let since_changed = match since.as_deref() {
104 Some(since_ref) => {
105 progress.emit(format!("scoping to git changes since {since_ref}"));
106 Some(git_changed_files(&ctx.project_root, since_ref)?)
107 }
108 None => None,
109 };
110 if doc_scope.is_unscoped() {
111 progress.emit("reading metadata and hashing snapshot");
112 } else {
113 progress.emit("reading metadata for scoped write");
114 }
115 let previous_meta = if doc_scope.is_unscoped() {
116 Some(io::read_codewiki_meta(out_path)?)
117 } else {
118 None
119 };
120 let index_snapshot = if doc_scope.is_unscoped() {
121 Some(build_codewiki_index_snapshot(&ctx.project_root, &input)?)
122 } else {
123 None
124 };
125 let mut ownership_meta = if doc_scope.is_unscoped() {
126 Some(read_ownership_meta(out_path)?)
127 } else {
128 None
129 };
130 let mut reuse_plan =
131 ReusePlan::load_with_since(&ctx.project_root, out_path, ai_mode, since_changed.clone())?;
132 let mut reuse = Some(&mut reuse_plan);
133 let mut sink =
134 DocSink::open_with_prune_scope(&ctx.project_root, out_path, ai_mode, doc_scope.clone())?
135 .with_since(since_changed);
136 let mut generated_pages = 0_usize;
137 let mut module_count = 0_usize;
138 let mut file_count = 0_usize;
139 let mut emit = |doc: BuiltDoc| -> anyhow::Result<()> {
142 generated_pages += 1;
143 if doc.path.starts_with("code/modules/") {
144 module_count += 1;
145 }
146 if doc.path.starts_with("code/files/") {
147 file_count += 1;
148 }
149 sink.persist(&doc)?;
150 Ok(())
151 };
152 generation::generate_hierarchical_docs_with_ownership(
153 &input,
154 ownership_meta
155 .as_mut()
156 .map(|meta| (ctx.project_root.as_path(), meta)),
157 Some(&system_model),
158 feature_catalog.as_ref(),
159 Some(&audit_context),
160 generator.as_deref_mut(),
161 verifier.as_deref_mut(),
162 ai_depth,
163 &mut reuse,
164 &mut progress,
165 &doc_scope,
166 &mut emit,
167 )?;
168 if let Some(index_snapshot) = index_snapshot.as_ref() {
169 progress.emit("generating changes docs");
170 emit(BuiltDoc::healthy(
171 "code/_changes.md",
172 build_codewiki_changes_doc(
173 previous_meta
174 .as_ref()
175 .and_then(|meta| meta.index_snapshot.as_ref()),
176 index_snapshot,
177 )?,
178 ))?;
179 }
180 if let Some(ownership_meta) = ownership_meta.as_ref() {
181 write_ownership_meta(out_path, ownership_meta)?;
182 }
183 let symbol_count = input
184 .symbols
185 .iter()
186 .filter(|symbol| is_core_file(&symbol.file_path))
187 .count();
188 let degraded_pages = sink.degraded_docs().to_vec();
192 if !degraded_pages.is_empty() && !ctx.quiet {
193 eprintln!(
198 "codewiki: {} page(s) degraded to structural fallback (AI content \
199 pass failed): {}",
200 degraded_pages.len(),
201 degraded_pages.join(", ")
202 );
203 }
204 let changed_paths = sink.finish(index_snapshot)?;
205 let skipped = generated_pages.saturating_sub(changed_paths.len());
206 if doc_scope.is_unscoped() {
207 let truth_digest =
208 build_truth_digest(&system_model, &ctx.project_id, file_count, module_count);
209 write_truth_digest(out_path, &doc_scope, &truth_digest)?;
210 }
211
212 let summary = CodewikiRunSummary {
213 command: "codewiki",
214 project_id: ctx.project_id.clone(),
215 project_root: ctx.project_root.display().to_string(),
216 out_dir,
217 generated_pages,
218 changed_paths,
219 skipped,
220 files: file_count,
221 modules: module_count,
222 symbols: symbol_count,
223 ai_enabled,
224 degraded_pages,
225 };
226 match format {
227 Format::Json => output::print_json(&summary),
228 Format::Text => {
229 if doc_scope.is_unscoped() {
230 output::print_text(&format!(
231 "wrote {} file docs, {} module docs, and repo.md to {}",
232 summary.files, summary.modules, summary.out_dir
233 ))
234 } else {
235 output::print_text(&format!(
236 "wrote {} scoped file docs and {} scoped module docs to {}",
237 summary.files, summary.modules, summary.out_dir
238 ))
239 }
240 }
241 }?;
242
243 Ok(())
244}
245
246pub fn run_repair(ctx: &Context, out: Option<String>, format: Format) -> anyhow::Result<()> {
252 let mut conn = db::connect_readonly(&ctx.database_url)?;
253 let files = visibility::visible_tree(&mut conn, ctx)?
254 .into_iter()
255 .map(|file| file.file_path)
256 .collect::<Vec<_>>();
257 let symbols = visibility::visible_symbols_for_files(&mut conn, ctx, &files)?;
258 let out_dir = out.unwrap_or_else(|| DEFAULT_OUT_DIR.to_string());
259 let summary = super::repair_citations(Path::new(&out_dir), &symbols)?;
260 match format {
261 Format::Json => output::print_json(&summary),
262 Format::Text => output::print_text(&format!(
263 "scanned {} pages; repaired {} pages, {} citations; {} unresolved",
264 summary.pages_scanned,
265 summary.pages_repaired,
266 summary.citations_repaired,
267 summary.citations_unresolved,
268 )),
269 }?;
270 Ok(())
271}
272
273pub(crate) fn validate_edge_limit(edge_limit: usize) -> anyhow::Result<()> {
274 if (1..=MAX_EDGE_LIMIT).contains(&edge_limit) {
275 return Ok(());
276 }
277 anyhow::bail!("codewiki --edge-limit must be between 1 and {MAX_EDGE_LIMIT}, got {edge_limit}")
278}
279
280pub(crate) fn git_changed_files(
285 project_root: &Path,
286 since_ref: &str,
287) -> anyhow::Result<std::collections::BTreeSet<String>> {
288 let output = std::process::Command::new("git")
289 .arg("-C")
290 .arg(project_root)
291 .args(["diff", "--name-only", "--relative", since_ref])
292 .output()
293 .map_err(|err| anyhow::anyhow!("failed to run git diff for --since {since_ref}: {err}"))?;
294 if !output.status.success() {
295 anyhow::bail!(
296 "git diff --name-only --relative {since_ref} failed: {}",
297 String::from_utf8_lossy(&output.stderr).trim()
298 );
299 }
300 Ok(String::from_utf8_lossy(&output.stdout)
301 .lines()
302 .map(str::trim)
303 .filter(|line| !line.is_empty())
304 .map(str::to_string)
305 .collect())
306}
307
308fn documents_file(file_path: &str) -> bool {
312 crate::index::languages::detect_language(file_path).is_some()
313}
314
315pub(crate) fn should_document_file(file_path: &str, include_docs: bool) -> bool {
318 include_docs || documents_file(file_path)
319}
320
321pub(crate) fn load_symbols_for_codewiki(
322 files: &[String],
323 progress: &mut CodewikiProgress,
324 mut load_symbols: impl FnMut(&[String]) -> anyhow::Result<Vec<Symbol>>,
325) -> anyhow::Result<Vec<Symbol>> {
326 progress.emit(format!("loading symbols for {} files", files.len()));
327 load_symbols(files)
328}
329
330fn load_leading_chunks(
334 conn: &mut postgres::Client,
335 ctx: &Context,
336 files: &[String],
337) -> anyhow::Result<BTreeMap<String, LeadingChunk>> {
338 let mut chunks = BTreeMap::new();
339 if files.is_empty() {
340 return Ok(chunks);
341 }
342 let project_ids = match &ctx.index_scope {
343 config::ProjectIndexScope::Single => vec![ctx.project_id.clone()],
344 config::ProjectIndexScope::Overlay {
345 overlay_project_id,
346 parent_project_id,
347 ..
348 } => vec![overlay_project_id.clone(), parent_project_id.clone()],
349 };
350 for project_id in project_ids {
351 let rows = conn.query(
352 "SELECT file_path,
353 line_start::BIGINT AS line_start,
354 line_end::BIGINT AS line_end,
355 content
356 FROM code_content_chunks
357 WHERE project_id = $1 AND file_path = ANY($2) AND chunk_index = 0",
358 &[&project_id, &files],
359 )?;
360 for row in rows {
361 let file_path: String = row.get("file_path");
362 if chunks.contains_key(&file_path) {
363 continue;
364 }
365 let line_start: i64 = row.get("line_start");
366 let line_end: i64 = row.get("line_end");
367 let content: String = row.get("content");
368 chunks.insert(
369 file_path,
370 LeadingChunk {
371 content,
372 line_start: usize::try_from(line_start).unwrap_or(0),
373 line_end: usize::try_from(line_end).unwrap_or(0),
374 },
375 );
376 }
377 }
378 Ok(chunks)
379}