Skip to main content

gobby_code/commands/codewiki/
generation.rs

1use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
2use std::fmt::Write as _;
3use std::path::Path;
4
5use crate::index::hasher;
6use crate::models::Symbol;
7
8use super::{
9    AiDepth, AuditContext, BuiltDoc, CodewikiGraphEdge, CodewikiGraphEdgeKind, CodewikiInput,
10    CodewikiProgress, DocPruneScope, FeatureCatalogDoc, FileDoc, FileDocPosition, LeadingChunk,
11    ModuleDoc, OwnershipMeta, OwnershipOptions, ReusePlan, SourceSpan, SystemModel, TextGenerator,
12    TextVerifier, build_architecture_doc, build_curated_navigation_docs, build_deprecations_doc,
13    build_file_doc, build_hotspots_doc, build_infrastructure_doc, build_module_docs_with_filter,
14    build_onboarding_doc, build_ownership_doc, build_repo_doc, cluster, cluster_file_modules,
15    file_doc_path, is_core_file, module_doc_path, module_for_file, relationship_facts_for_file,
16    render_architecture_doc, render_deprecations_doc, render_feature_catalog_doc, render_file_doc,
17    render_hotspots_doc, render_infrastructure_doc, render_module_doc, render_onboarding_doc,
18    span_files,
19};
20
21pub fn generate_hierarchical_docs(
22    input: &CodewikiInput,
23    generate: Option<&mut TextGenerator<'_>>,
24) -> Vec<(String, String)> {
25    generate_hierarchical_docs_with_graph_availability(input, generate)
26        .into_iter()
27        .map(|doc| (doc.path, doc.content))
28        .collect()
29}
30
31fn generate_hierarchical_docs_with_graph_availability(
32    input: &CodewikiInput,
33    mut generate: Option<&mut TextGenerator<'_>>,
34) -> Vec<BuiltDoc> {
35    let mut progress = CodewikiProgress::silent();
36    let doc_scope = DocPruneScope::unscoped();
37    let mut docs = Vec::new();
38    if let Err(error) = generate_hierarchical_docs_core(
39        input,
40        None,
41        None,
42        None,
43        None,
44        &mut generate,
45        &mut None,
46        AiDepth::Symbols,
47        &mut None,
48        &mut progress,
49        &doc_scope,
50        &mut |doc| {
51            docs.push(doc);
52            Ok(())
53        },
54    ) {
55        log::warn!("codewiki generation failed without ownership metadata: {error}");
56        return Vec::new();
57    }
58    docs
59}
60
61#[expect(clippy::too_many_arguments)]
62pub(crate) fn generate_hierarchical_docs_with_ownership(
63    input: &CodewikiInput,
64    ownership: Option<(&Path, &mut OwnershipMeta)>,
65    system_model: Option<&SystemModel>,
66    feature_catalog: Option<&FeatureCatalogDoc>,
67    audit: Option<&AuditContext>,
68    mut generate: Option<&mut TextGenerator<'_>>,
69    mut verify: Option<&mut TextVerifier<'_>>,
70    ai_depth: AiDepth,
71    reuse: &mut Option<&mut ReusePlan>,
72    progress: &mut CodewikiProgress,
73    doc_scope: &DocPruneScope,
74    emit: &mut dyn FnMut(BuiltDoc) -> anyhow::Result<()>,
75) -> anyhow::Result<()> {
76    generate_hierarchical_docs_core(
77        input,
78        ownership,
79        system_model,
80        feature_catalog,
81        audit,
82        &mut generate,
83        &mut verify,
84        ai_depth,
85        reuse,
86        progress,
87        doc_scope,
88        emit,
89    )
90}
91
92#[cfg(test)]
93pub(crate) fn generate_hierarchical_docs_with_progress(
94    input: &CodewikiInput,
95    generate: Option<&mut TextGenerator<'_>>,
96    ai_depth: AiDepth,
97    progress: &mut CodewikiProgress,
98) -> Vec<BuiltDoc> {
99    generate_hierarchical_docs_with_reuse(input, generate, ai_depth, &mut None, progress)
100}
101
102/// Test entry point that exercises the reuse path without the CLI runtime.
103#[cfg(test)]
104pub(crate) fn generate_hierarchical_docs_with_reuse(
105    input: &CodewikiInput,
106    mut generate: Option<&mut TextGenerator<'_>>,
107    ai_depth: AiDepth,
108    reuse: &mut Option<&mut ReusePlan>,
109    progress: &mut CodewikiProgress,
110) -> Vec<BuiltDoc> {
111    let doc_scope = DocPruneScope::unscoped();
112    let mut docs = Vec::new();
113    if let Err(error) = generate_hierarchical_docs_core(
114        input,
115        None,
116        None,
117        None,
118        None,
119        &mut generate,
120        &mut None,
121        ai_depth,
122        reuse,
123        progress,
124        &doc_scope,
125        &mut |doc| {
126            docs.push(doc);
127            Ok(())
128        },
129    ) {
130        log::warn!("codewiki generation failed without ownership metadata: {error}");
131        return Vec::new();
132    }
133    docs
134}
135
136/// Test entry point that threads a verifier alongside the generator, so the
137/// grounded verification pass can be exercised end-to-end through the curated
138/// page pipeline without the CLI runtime.
139#[cfg(test)]
140pub(crate) fn generate_hierarchical_docs_with_verify(
141    input: &CodewikiInput,
142    generate: Option<&mut TextGenerator<'_>>,
143    verify: Option<&mut TextVerifier<'_>>,
144    ai_depth: AiDepth,
145) -> Vec<BuiltDoc> {
146    let mut generate = generate;
147    let mut verify = verify;
148    let mut progress = CodewikiProgress::silent();
149    let doc_scope = DocPruneScope::unscoped();
150    let mut docs = Vec::new();
151    if let Err(error) = generate_hierarchical_docs_core(
152        input,
153        None,
154        None,
155        None,
156        None,
157        &mut generate,
158        &mut verify,
159        ai_depth,
160        &mut None,
161        &mut progress,
162        &doc_scope,
163        &mut |doc| {
164            docs.push(doc);
165            Ok(())
166        },
167    ) {
168        log::warn!("codewiki generation failed without ownership metadata: {error}");
169        return Vec::new();
170    }
171    docs
172}
173
174/// Reference-appendix links for the deterministic analysis/catalog pages,
175/// included only for the pages that will actually be emitted this run (#904).
176/// Returns `(label, wikilink-target)` pairs; an absent page is never linked, so
177/// the repo overview can't dangle.
178fn repo_audit_links(
179    has_audit: bool,
180    has_feature_catalog: bool,
181    has_infrastructure: bool,
182) -> Vec<(&'static str, &'static str)> {
183    let mut links = Vec::new();
184    if has_feature_catalog {
185        links.push(("Feature catalog", "code/features"));
186    }
187    if has_infrastructure {
188        links.push(("Infrastructure stack", "code/infrastructure"));
189    }
190    if has_audit {
191        links.push(("Deprecations", "code/deprecations"));
192    }
193    links
194}
195
196#[expect(
197    clippy::too_many_arguments,
198    reason = "core generation threads mutable generator, verifier, reuse, progress, scope, and emit state"
199)]
200pub(crate) fn generate_hierarchical_docs_core(
201    input: &CodewikiInput,
202    ownership: Option<(&Path, &mut OwnershipMeta)>,
203    // Deterministic workspace system model (#891). Seeds the architecture
204    // page's model-derived Mermaid diagrams. The CLI runtime passes the real
205    // model built from the project root; test/AI-off entry points pass `None`
206    // to omit the diagram section.
207    system_model: Option<&SystemModel>,
208    // Deterministic feature catalog (#888), built from the pinned CLI contract
209    // JSONs + dispatch resolver. The CLI runtime passes the real catalog; the
210    // test/AI-off entry points pass `None` to omit the catalog page, exactly
211    // like `system_model`.
212    feature_catalog: Option<&FeatureCatalogDoc>,
213    // Deterministic audit context (#889): the deprecation index (stamped into
214    // each file doc's symbols for the badge + the `code/deprecations.md` page)
215    // and the test-gated symbol index (for the file page's test-count collapse).
216    // The CLI runtime passes the real context; test/AI-off entry points pass
217    // `None` to omit the deprecations page, exactly like `system_model`.
218    audit: Option<&AuditContext>,
219    generate: &mut Option<&mut TextGenerator<'_>>,
220    verify: &mut Option<&mut TextVerifier<'_>>,
221    ai_depth: AiDepth,
222    reuse: &mut Option<&mut ReusePlan>,
223    progress: &mut CodewikiProgress,
224    doc_scope: &DocPruneScope,
225    emit: &mut dyn FnMut(BuiltDoc) -> anyhow::Result<()>,
226) -> anyhow::Result<()> {
227    let mut files = input
228        .files
229        .iter()
230        .filter(|file| is_core_file(file) && doc_scope.includes_file(file))
231        .cloned()
232        .collect::<BTreeSet<_>>();
233    for symbol in &input.symbols {
234        if is_core_file(&symbol.file_path) && doc_scope.includes_file(&symbol.file_path) {
235            files.insert(symbol.file_path.clone());
236        }
237    }
238    let files = files.into_iter().collect::<Vec<_>>();
239
240    let mut symbols_by_file: BTreeMap<String, Vec<Symbol>> = BTreeMap::new();
241    for symbol in &input.symbols {
242        if !is_core_file(&symbol.file_path) || !doc_scope.includes_file(&symbol.file_path) {
243            continue;
244        }
245        symbols_by_file
246            .entry(symbol.file_path.clone())
247            .or_default()
248            .push(symbol.clone());
249    }
250    for symbols in symbols_by_file.values_mut() {
251        symbols.sort_by_key(|symbol| (symbol.line_start, symbol.byte_start, symbol.name.clone()));
252    }
253
254    let file_modules = cluster_file_modules(&files, &symbols_by_file, &input.graph_edges);
255    // Resolve graph-edge endpoints (symbol component ids) back to their symbols
256    // so each file's narrative can name concrete cross-file collaborators (#885).
257    let symbols_by_id = input
258        .symbols
259        .iter()
260        .map(|symbol| (symbol.id.as_str(), symbol))
261        .collect::<HashMap<&str, &Symbol>>();
262    let file_verb = if ai_depth.includes_files() {
263        "generating"
264    } else {
265        "building"
266    };
267    progress.emit(format!("{file_verb} file docs for {} files", files.len()));
268    let file_total = files.len();
269    let mut file_docs = Vec::with_capacity(file_total);
270    for (index, file) in files.iter().enumerate() {
271        let file_symbols = symbols_by_file.remove(file).unwrap_or_default();
272        // Cross-file relationships are derived before the symbols are moved into
273        // the file doc; the id set borrows them only within this block.
274        let relationships = {
275            let file_symbol_ids = file_symbols
276                .iter()
277                .map(|symbol| symbol.id.as_str())
278                .collect::<HashSet<&str>>();
279            relationship_facts_for_file(file, &file_symbol_ids, &symbols_by_id, &input.graph_edges)
280        };
281        let file_doc = build_file_doc(
282            file,
283            file_modules
284                .get(file)
285                .cloned()
286                .unwrap_or_else(|| module_for_file(file)),
287            file_symbols,
288            input.leading_chunks.get(file),
289            &relationships,
290            audit.map(|audit| &audit.deprecations),
291            audit.map(|audit| &audit.tests),
292            generate,
293            verify,
294            reuse,
295            ai_depth,
296            progress,
297            FileDocPosition {
298                index: index + 1,
299                total: file_total,
300            },
301        );
302        emit(
303            BuiltDoc {
304                path: file_doc_path(&file_doc.path),
305                content: file_doc
306                    .reused_page
307                    .clone()
308                    .unwrap_or_else(|| render_file_doc(&file_doc)),
309                degraded: file_doc.degraded,
310                summary: Some(file_doc.summary.clone()),
311                neighbors: BTreeSet::new(),
312                invalidation_key: None,
313                invalidation_key_requires_sources: false,
314            }
315            // Record the cross-file neighbor set so a caller/import-target edit
316            // invalidates this page on the next run (#885, Leaf H).
317            .with_neighbors(relationships.neighbor_files(file)),
318        )?;
319        file_docs.push(file_doc);
320    }
321    progress.emit("generating module docs");
322    let module_docs = build_module_docs_with_filter(
323        &file_docs,
324        &input.leading_chunks,
325        &input.graph_edges,
326        generate,
327        reuse,
328        progress,
329        &|module| doc_scope.includes_module(module),
330        &mut |module| {
331            emit(BuiltDoc {
332                path: module_doc_path(&module.module),
333                content: module
334                    .reused_page
335                    .clone()
336                    .unwrap_or_else(|| render_module_doc(module)),
337                degraded: module.degraded,
338                summary: Some(module.summary.clone()),
339                // A module aggregate invalidates through its member files'
340                // source hashes (member-set + members hash), recorded as the
341                // page's provenance — no separate key or neighbor set needed.
342                neighbors: BTreeSet::new(),
343                invalidation_key: None,
344                invalidation_key_requires_sources: false,
345            })
346        },
347    )?;
348    if !doc_scope.is_unscoped() {
349        return Ok(());
350    }
351    for doc in build_curated_navigation_docs(
352        &file_docs,
353        &module_docs,
354        &input.leading_chunks,
355        generate,
356        verify,
357        reuse,
358        progress,
359    ) {
360        emit(doc)?;
361    }
362    // Audit/analysis pages are deterministic, input-gated projections (#904).
363    // Build the infrastructure page once here (reused at its emission site
364    // below) and link every page that will actually be emitted into the repo
365    // overview's appendix, so they are reachable instead of orphaned.
366    let infrastructure_doc = build_infrastructure_doc(system_model);
367    let audit_links = repo_audit_links(
368        audit.is_some(),
369        feature_catalog.is_some(),
370        infrastructure_doc.is_some(),
371    );
372    let (repo_doc, repo_degraded, repo_key) = build_repo_doc(
373        &file_docs,
374        &module_docs,
375        &input.leading_chunks,
376        &audit_links,
377        generate,
378        reuse,
379        progress,
380    );
381    emit(
382        BuiltDoc {
383            path: "code/repo.md".to_string(),
384            content: repo_doc,
385            degraded: repo_degraded,
386            summary: None,
387            neighbors: BTreeSet::new(),
388            invalidation_key: Some(repo_key),
389            invalidation_key_requires_sources: true,
390        }
391        .with_source_sensitive_key(),
392    )?;
393    progress.emit("generating architecture docs");
394    // Architecture is keyed by the SystemModel plus architecture prompt inputs:
395    // a function-body edit leaves it alone, while graph/prose evidence changes
396    // rebuild it. Test/AI-off entry points pass no model and fall back to the
397    // old full source-set reuse.
398    let architecture_key = system_model.map(|model| {
399        architecture_invalidation_key(
400            model,
401            &file_docs,
402            &module_docs,
403            &input.graph_edges,
404            &input.leading_chunks,
405        )
406    });
407    let infrastructure_key = system_model.map(infrastructure_invalidation_key);
408    let subsystem_names = cluster::subsystem_roots(&files);
409    let architecture_sources = span_files(
410        &module_docs
411            .iter()
412            .filter(|module| subsystem_names.contains(&module.module))
413            .flat_map(|module| module.source_spans.iter().cloned())
414            .collect::<Vec<_>>(),
415    );
416    let reused_architecture = match architecture_key.as_deref() {
417        Some(key) => reuse
418            .as_deref_mut()
419            .and_then(|plan| plan.reusable_page_keyed("code/_architecture.md", key)),
420        None => reuse
421            .as_deref_mut()
422            .and_then(|plan| plan.reusable_page("code/_architecture.md", &architecture_sources)),
423    };
424    let architecture_built = match reused_architecture {
425        Some(page) => {
426            progress.emit("reusing architecture docs (system model unchanged)");
427            match architecture_key.clone() {
428                Some(key) => BuiltDoc::derived("code/_architecture.md", page, key),
429                None => BuiltDoc::healthy("code/_architecture.md", page),
430            }
431        }
432        None => {
433            let architecture_doc = build_architecture_doc(
434                &file_docs,
435                &module_docs,
436                &input.graph_edges,
437                &input.leading_chunks,
438                system_model,
439                generate,
440                progress,
441            );
442            BuiltDoc {
443                path: "code/_architecture.md".to_string(),
444                content: render_architecture_doc(&architecture_doc),
445                degraded: architecture_doc
446                    .degraded_sources
447                    .iter()
448                    .any(|source| source == "model-unavailable"),
449                summary: None,
450                neighbors: BTreeSet::new(),
451                invalidation_key: architecture_key.clone(),
452                invalidation_key_requires_sources: false,
453            }
454        }
455    };
456    emit(architecture_built)?;
457    // Deterministic infra-stack page (#892). Built straight from the workspace
458    // system model + curated descriptors — no LLM, never degraded. Omitted when
459    // no model was supplied (AI-off / test entry points), exactly like the
460    // architecture diagrams.
461    progress.emit("generating infrastructure docs");
462    if let Some(infrastructure_doc) = infrastructure_doc {
463        let content = render_infrastructure_doc(&infrastructure_doc);
464        emit(match infrastructure_key.clone() {
465            Some(key) => BuiltDoc::derived("code/infrastructure.md", content, key),
466            None => BuiltDoc::healthy("code/infrastructure.md", content),
467        })?;
468    }
469    // Deterministic feature catalog page (#888). Built straight from the pinned
470    // CLI contract JSONs + dispatch resolver — no LLM, never degraded. Omitted
471    // when no catalog was supplied (AI-off / test entry points), exactly like
472    // the architecture diagrams and the infrastructure stack page.
473    progress.emit("generating feature catalog");
474    if let Some(catalog) = feature_catalog {
475        let content = render_feature_catalog_doc(catalog);
476        // Faithful "contract hash" (Leaf H, #893): the feature catalog render is
477        // a pure, deterministic projection of the pinned CLI contract, so a
478        // digest of its output changes exactly when the contract surface does —
479        // a function-body edit leaves it untouched.
480        let key = hasher::content_hash(content.as_bytes());
481        emit(BuiltDoc::derived("code/features.md", content, key))?;
482    }
483    // Deterministic audit page (#889): the deprecation aggregate. Built straight
484    // from the source scan — no LLM, NEVER degraded. Omitted when no audit
485    // context was supplied (AI-off / test entry points), exactly like the
486    // feature catalog.
487    if let Some(audit) = audit {
488        // Faithful "deprecation-set hash" (Leaf H, #893): the page is a
489        // deterministic projection of the deprecation scan, so a digest of its
490        // rendered output invalidates exactly on those input changes.
491        progress.emit("generating deprecations docs");
492        let deprecations =
493            render_deprecations_doc(&build_deprecations_doc(input, &audit.deprecations));
494        let deprecations_key = hasher::content_hash(deprecations.as_bytes());
495        emit(BuiltDoc::derived(
496            "code/deprecations.md",
497            deprecations,
498            deprecations_key,
499        ))?;
500    }
501    progress.emit("generating onboarding docs");
502    let onboarding_doc = build_onboarding_doc(
503        &file_docs,
504        &module_docs,
505        &input.graph_edges,
506        input.graph_availability,
507    );
508    emit(BuiltDoc::healthy(
509        "code/_onboarding.md",
510        render_onboarding_doc(&onboarding_doc),
511    ))?;
512    progress.emit("generating hotspots docs");
513    let hotspots_doc = build_hotspots_doc(&file_docs, &input.graph_edges, input.graph_availability);
514    emit(BuiltDoc::healthy(
515        "code/_hotspots.md",
516        render_hotspots_doc(&hotspots_doc),
517    ))?;
518    if let Some((project_root, ownership_meta)) = ownership {
519        progress.emit("generating ownership docs");
520        emit(BuiltDoc::healthy(
521            "code/_ownership.md",
522            build_ownership_doc(
523                project_root,
524                &files,
525                &file_modules,
526                ownership_meta,
527                OwnershipOptions::default(),
528            )?,
529        ))?;
530    }
531    Ok(())
532}
533
534fn architecture_invalidation_key(
535    system_model: &SystemModel,
536    file_docs: &[FileDoc],
537    module_docs: &[ModuleDoc],
538    graph_edges: &[CodewikiGraphEdge],
539    leading_chunks: &BTreeMap<String, LeadingChunk>,
540) -> String {
541    let mut key = String::from("architecture:v2\n");
542    let _ = writeln!(key, "system={}", system_model.digest());
543
544    for file in file_docs {
545        let _ = writeln!(
546            key,
547            "file\t{}\t{}\t{}",
548            file.path, file.module, file.summary
549        );
550        for span in &file.source_spans {
551            push_span_key(&mut key, "file-span", span);
552        }
553        for component_id in &file.component_ids {
554            let _ = writeln!(key, "file-component\t{}\t{}", file.path, component_id);
555        }
556        for symbol in &file.symbols {
557            let _ = writeln!(
558                key,
559                "symbol\t{}\t{}\t{}\t{}",
560                file.path, symbol.component_label, symbol.component_id, symbol.purpose
561            );
562        }
563    }
564
565    for module in module_docs {
566        let _ = writeln!(key, "module\t{}\t{}", module.module, module.summary);
567        for span in &module.source_spans {
568            push_span_key(&mut key, "module-span", span);
569        }
570        for file in &module.direct_files {
571            let _ = writeln!(
572                key,
573                "module-file\t{}\t{}\t{}",
574                module.module, file.path, file.summary
575            );
576        }
577        for child in &module.child_modules {
578            let _ = writeln!(
579                key,
580                "module-child\t{}\t{}\t{}",
581                module.module, child.module, child.summary
582            );
583        }
584    }
585
586    let mut edges = graph_edges.iter().collect::<Vec<_>>();
587    edges.sort_by(|left, right| {
588        edge_kind_key(&left.kind)
589            .cmp(edge_kind_key(&right.kind))
590            .then_with(|| left.source_component_id.cmp(&right.source_component_id))
591            .then_with(|| left.target_component_id.cmp(&right.target_component_id))
592    });
593    for edge in edges {
594        let _ = writeln!(
595            key,
596            "edge\t{}\t{}\t{}",
597            edge_kind_key(&edge.kind),
598            edge.source_component_id,
599            edge.target_component_id
600        );
601    }
602
603    for (path, chunk) in leading_chunks {
604        let chunk_hash = hasher::content_hash(chunk.content.as_bytes());
605        let _ = writeln!(
606            key,
607            "leading\t{}\t{}\t{}\t{}",
608            path, chunk.line_start, chunk.line_end, chunk_hash
609        );
610    }
611
612    format!("architecture:{}", hasher::content_hash(key.as_bytes()))
613}
614
615fn infrastructure_invalidation_key(system_model: &SystemModel) -> String {
616    format!("infrastructure:{}", system_model.digest())
617}
618
619fn push_span_key(out: &mut String, prefix: &str, span: &SourceSpan) {
620    let _ = writeln!(
621        out,
622        "{}\t{}\t{}\t{}",
623        prefix, span.file, span.line_start, span.line_end
624    );
625}
626
627fn edge_kind_key(kind: &CodewikiGraphEdgeKind) -> &'static str {
628    match kind {
629        CodewikiGraphEdgeKind::Call => "call",
630        CodewikiGraphEdgeKind::Import => "import",
631    }
632}