use std::collections::BTreeMap;
use std::path::Path;
use crate::commands::scope;
use crate::config::{self, Context};
use crate::db;
use crate::models::Symbol;
use crate::output::{self, Format};
use crate::visibility;
use super::{
BuiltDoc, CodewikiAiOptions, CodewikiInput, CodewikiProgress, CodewikiRunSummary,
DEFAULT_OUT_DIR, DocPruneScope, DocSink, LeadingChunk, MAX_EDGE_LIMIT, ReusePlan,
build_audit_context, build_codewiki_changes_doc, build_codewiki_index_snapshot,
build_feature_catalog_doc, build_system_model, build_truth_digest, fetch_codewiki_graph_edges,
generation, in_scope, io, is_core_file, read_ownership_meta, resolve_text_generator,
resolve_text_verifier, write_ownership_meta, write_truth_digest,
};
#[allow(clippy::too_many_arguments)]
pub fn run(
ctx: &Context,
out: Option<String>,
scope_args: Vec<String>,
ai: CodewikiAiOptions,
edge_limit: usize,
include_docs: bool,
since: Option<String>,
format: Format,
verbose: bool,
) -> anyhow::Result<()> {
validate_edge_limit(edge_limit)?;
let ai_depth = ai.depth;
let mut progress = CodewikiProgress::stderr(verbose && !ctx.quiet);
let mut conn = db::connect_readonly(&ctx.database_url)?;
let scopes = scope_args
.iter()
.map(|value| scope::normalize_file_arg(ctx, value))
.collect::<Vec<_>>();
progress.emit("loading indexed files");
let files = visibility::visible_tree(&mut conn, ctx)?
.into_iter()
.filter(|file| should_document_file(&file.file_path, include_docs))
.map(|file| file.file_path)
.filter(|file| in_scope(file, &scopes))
.collect::<Vec<_>>();
let symbols = load_symbols_for_codewiki(&files, &mut progress, |paths| {
visibility::visible_symbols_for_files(&mut conn, ctx, paths)
})?;
progress.emit(format!(
"fetching graph edges for {} files and {} symbols (limit {})",
files.len(),
symbols.len(),
edge_limit
));
progress.emit("loading leading content chunks");
let leading_chunks = load_leading_chunks(&mut conn, ctx, &files)?;
let graph = fetch_codewiki_graph_edges(ctx, &files, &symbols, edge_limit)?;
let input = CodewikiInput {
files,
graph_edges: graph.edges,
graph_availability: graph.availability,
symbols,
leading_chunks,
};
let system_model = build_system_model(&ctx.project_root);
let feature_catalog = build_feature_catalog_doc(&ctx.project_root, &input.files);
let audit_context = build_audit_context(&ctx.project_root, &input);
let mut generator = resolve_text_generator(ctx, &ai);
let mut verifier = resolve_text_verifier(ctx, &ai);
let ai_enabled = generator.is_some();
let ai_mode = if ai_enabled {
ai_depth.mode_label()
} else {
"off"
};
let out_dir = out.unwrap_or_else(|| DEFAULT_OUT_DIR.to_string());
let out_path = Path::new(&out_dir);
let doc_scope = DocPruneScope::from_scopes(&scopes);
let since_changed = match since.as_deref() {
Some(since_ref) => {
progress.emit(format!("scoping to git changes since {since_ref}"));
Some(git_changed_files(&ctx.project_root, since_ref)?)
}
None => None,
};
if doc_scope.is_unscoped() {
progress.emit("reading metadata and hashing snapshot");
} else {
progress.emit("reading metadata for scoped write");
}
let previous_meta = if doc_scope.is_unscoped() {
Some(io::read_codewiki_meta(out_path)?)
} else {
None
};
let index_snapshot = if doc_scope.is_unscoped() {
Some(build_codewiki_index_snapshot(&ctx.project_root, &input)?)
} else {
None
};
let mut ownership_meta = if doc_scope.is_unscoped() {
Some(read_ownership_meta(out_path)?)
} else {
None
};
let mut reuse_plan =
ReusePlan::load_with_since(&ctx.project_root, out_path, ai_mode, since_changed.clone())?;
let mut reuse = Some(&mut reuse_plan);
let mut sink =
DocSink::open_with_prune_scope(&ctx.project_root, out_path, ai_mode, doc_scope.clone())?
.with_since(since_changed);
let mut generated_pages = 0_usize;
let mut module_count = 0_usize;
let mut file_count = 0_usize;
let mut emit = |doc: BuiltDoc| -> anyhow::Result<()> {
generated_pages += 1;
if doc.path.starts_with("code/modules/") {
module_count += 1;
}
if doc.path.starts_with("code/files/") {
file_count += 1;
}
sink.persist(&doc)?;
Ok(())
};
generation::generate_hierarchical_docs_with_ownership(
&input,
ownership_meta
.as_mut()
.map(|meta| (ctx.project_root.as_path(), meta)),
Some(&system_model),
feature_catalog.as_ref(),
Some(&audit_context),
generator.as_deref_mut(),
verifier.as_deref_mut(),
ai_depth,
&mut reuse,
&mut progress,
&doc_scope,
&mut emit,
)?;
if let Some(index_snapshot) = index_snapshot.as_ref() {
progress.emit("generating changes docs");
emit(BuiltDoc::healthy(
"code/_changes.md",
build_codewiki_changes_doc(
previous_meta
.as_ref()
.and_then(|meta| meta.index_snapshot.as_ref()),
index_snapshot,
)?,
))?;
}
if let Some(ownership_meta) = ownership_meta.as_ref() {
write_ownership_meta(out_path, ownership_meta)?;
}
let symbol_count = input
.symbols
.iter()
.filter(|symbol| is_core_file(&symbol.file_path))
.count();
let degraded_pages = sink.degraded_docs().to_vec();
if !degraded_pages.is_empty() && !ctx.quiet {
eprintln!(
"codewiki: {} page(s) degraded to structural fallback (AI content \
pass failed): {}",
degraded_pages.len(),
degraded_pages.join(", ")
);
}
let changed_paths = sink.finish(index_snapshot)?;
let skipped = generated_pages.saturating_sub(changed_paths.len());
if doc_scope.is_unscoped() {
let truth_digest =
build_truth_digest(&system_model, &ctx.project_id, file_count, module_count);
write_truth_digest(out_path, &doc_scope, &truth_digest)?;
}
let summary = CodewikiRunSummary {
command: "codewiki",
project_id: ctx.project_id.clone(),
project_root: ctx.project_root.display().to_string(),
out_dir,
generated_pages,
changed_paths,
skipped,
files: file_count,
modules: module_count,
symbols: symbol_count,
ai_enabled,
degraded_pages,
};
match format {
Format::Json => output::print_json(&summary),
Format::Text => {
if doc_scope.is_unscoped() {
output::print_text(&format!(
"wrote {} file docs, {} module docs, and repo.md to {}",
summary.files, summary.modules, summary.out_dir
))
} else {
output::print_text(&format!(
"wrote {} scoped file docs and {} scoped module docs to {}",
summary.files, summary.modules, summary.out_dir
))
}
}
}?;
Ok(())
}
pub fn run_repair(ctx: &Context, out: Option<String>, format: Format) -> anyhow::Result<()> {
let mut conn = db::connect_readonly(&ctx.database_url)?;
let files = visibility::visible_tree(&mut conn, ctx)?
.into_iter()
.map(|file| file.file_path)
.collect::<Vec<_>>();
let symbols = visibility::visible_symbols_for_files(&mut conn, ctx, &files)?;
let out_dir = out.unwrap_or_else(|| DEFAULT_OUT_DIR.to_string());
let summary = super::repair_citations(Path::new(&out_dir), &symbols)?;
match format {
Format::Json => output::print_json(&summary),
Format::Text => output::print_text(&format!(
"scanned {} pages; repaired {} pages, {} citations; {} unresolved",
summary.pages_scanned,
summary.pages_repaired,
summary.citations_repaired,
summary.citations_unresolved,
)),
}?;
Ok(())
}
pub(crate) fn validate_edge_limit(edge_limit: usize) -> anyhow::Result<()> {
if (1..=MAX_EDGE_LIMIT).contains(&edge_limit) {
return Ok(());
}
anyhow::bail!("codewiki --edge-limit must be between 1 and {MAX_EDGE_LIMIT}, got {edge_limit}")
}
pub(crate) fn git_changed_files(
project_root: &Path,
since_ref: &str,
) -> anyhow::Result<std::collections::BTreeSet<String>> {
let output = std::process::Command::new("git")
.arg("-C")
.arg(project_root)
.args(["diff", "--name-only", "--relative", since_ref])
.output()
.map_err(|err| anyhow::anyhow!("failed to run git diff for --since {since_ref}: {err}"))?;
if !output.status.success() {
anyhow::bail!(
"git diff --name-only --relative {since_ref} failed: {}",
String::from_utf8_lossy(&output.stderr).trim()
);
}
Ok(String::from_utf8_lossy(&output.stdout)
.lines()
.map(str::trim)
.filter(|line| !line.is_empty())
.map(str::to_string)
.collect())
}
fn documents_file(file_path: &str) -> bool {
crate::index::languages::detect_language(file_path).is_some()
}
pub(crate) fn should_document_file(file_path: &str, include_docs: bool) -> bool {
include_docs || documents_file(file_path)
}
pub(crate) fn load_symbols_for_codewiki(
files: &[String],
progress: &mut CodewikiProgress,
mut load_symbols: impl FnMut(&[String]) -> anyhow::Result<Vec<Symbol>>,
) -> anyhow::Result<Vec<Symbol>> {
progress.emit(format!("loading symbols for {} files", files.len()));
load_symbols(files)
}
fn load_leading_chunks(
conn: &mut postgres::Client,
ctx: &Context,
files: &[String],
) -> anyhow::Result<BTreeMap<String, LeadingChunk>> {
let mut chunks = BTreeMap::new();
if files.is_empty() {
return Ok(chunks);
}
let project_ids = match &ctx.index_scope {
config::ProjectIndexScope::Single => vec![ctx.project_id.clone()],
config::ProjectIndexScope::Overlay {
overlay_project_id,
parent_project_id,
..
} => vec![overlay_project_id.clone(), parent_project_id.clone()],
};
for project_id in project_ids {
let rows = conn.query(
"SELECT file_path,
line_start::BIGINT AS line_start,
line_end::BIGINT AS line_end,
content
FROM code_content_chunks
WHERE project_id = $1 AND file_path = ANY($2) AND chunk_index = 0",
&[&project_id, &files],
)?;
for row in rows {
let file_path: String = row.get("file_path");
if chunks.contains_key(&file_path) {
continue;
}
let line_start: i64 = row.get("line_start");
let line_end: i64 = row.get("line_end");
let content: String = row.get("content");
chunks.insert(
file_path,
LeadingChunk {
content,
line_start: usize::try_from(line_start).unwrap_or(0),
line_end: usize::try_from(line_end).unwrap_or(0),
},
);
}
}
Ok(chunks)
}