Skip to main content

gobby_code/commands/
codewiki.rs

1use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque};
2use std::fmt::Write as _;
3use std::path::{Path, PathBuf};
4
5use gobby_core::ai::{daemon::generate_via_daemon, effective_route, text::generate_text};
6use gobby_core::ai_context::{AiConfigSource, AiContext, AiContextOptions, PostgresAiConfigSource};
7use gobby_core::config::{AiCapability, AiRouting};
8use serde::{Deserialize, Serialize};
9
10use crate::commands::scope;
11use crate::config::{self, Context};
12use crate::db;
13use crate::falkor;
14use crate::index::hasher;
15use crate::models::Symbol;
16use crate::output::{self, Format};
17use crate::secrets;
18use crate::visibility;
19
20const DEFAULT_OUT_DIR: &str = "codewiki";
21const CODEWIKI_META_PATH: &str = "_meta/codewiki.json";
22const MAX_MERMAID_HOPS: usize = 2;
23const MAX_MERMAID_EDGES: usize = 20;
24
25mod prompts {
26    use std::fmt::Write as _;
27
28    use crate::models::Symbol;
29
30    pub const SYMBOL_SYSTEM: &str = "You write concise API reference notes. Return one sentence describing the symbol's purpose. Do not include markdown fences.";
31    pub const FILE_SYSTEM: &str = "You write concise file-level code documentation. Return a short purpose summary that reuses the supplied symbol summaries. Do not include markdown fences.";
32    pub const MODULE_SYSTEM: &str = "You write concise module overviews for code documentation. Return a short overview from the supplied child summaries. Do not include markdown fences.";
33    pub const REPO_SYSTEM: &str = "You write concise repository overviews for code documentation. Return a short overview from the supplied module summaries. Do not include markdown fences.";
34
35    pub fn symbol_prompt(symbol: &Symbol) -> String {
36        let mut prompt = format!(
37            "File: {}\nSymbol: {} [{}]\nLines: {}-{}",
38            symbol.file_path,
39            symbol.qualified_name,
40            symbol.kind,
41            symbol.line_start,
42            symbol.line_end
43        );
44        if let Some(signature) = symbol
45            .signature
46            .as_deref()
47            .filter(|value| !value.is_empty())
48        {
49            let _ = write!(prompt, "\nSignature: {signature}");
50        }
51        if let Some(docstring) = symbol
52            .docstring
53            .as_deref()
54            .filter(|value| !value.is_empty())
55        {
56            let _ = write!(prompt, "\nExisting docs: {docstring}");
57        }
58        prompt
59    }
60
61    pub fn file_prompt(file: &str, symbols: &[SymbolSummary]) -> String {
62        let mut prompt =
63            format!("Summarize this file once from its AST symbols.\n\nFile: {file}\n\nSymbols:\n");
64        if symbols.is_empty() {
65            prompt.push_str("- No indexed symbols.\n");
66        } else {
67            for symbol in symbols {
68                let _ = writeln!(
69                    prompt,
70                    "- {} [{}] component {} ({}) lines {}-{}: {}",
71                    symbol.name,
72                    symbol.kind,
73                    symbol.component_label,
74                    symbol.component_id,
75                    symbol.line_start,
76                    symbol.line_end,
77                    symbol.purpose
78                );
79            }
80        }
81        prompt
82    }
83
84    pub fn module_prompt(
85        module: &str,
86        files: &[ChildSummary],
87        modules: &[ChildSummary],
88        components: &[String],
89    ) -> String {
90        let mut prompt = format!(
91            "Summarize this module once from lower-level summaries.\n\nModule: {module}\n\nFiles:\n"
92        );
93        if files.is_empty() {
94            prompt.push_str("- No direct files.\n");
95        } else {
96            for file in files {
97                let _ = writeln!(prompt, "- {}: {}", file.name, file.summary);
98            }
99        }
100        prompt.push_str("\nChild modules:\n");
101        if modules.is_empty() {
102            prompt.push_str("- No child modules.\n");
103        } else {
104            for module in modules {
105                let _ = writeln!(prompt, "- {}: {}", module.name, module.summary);
106            }
107        }
108        prompt.push_str("\nStable component IDs:\n");
109        if components.is_empty() {
110            prompt.push_str("- No indexed components.\n");
111        } else {
112            for component in components {
113                let _ = writeln!(prompt, "- {component}");
114            }
115        }
116        prompt
117    }
118
119    pub fn repo_prompt(modules: &[ChildSummary], files: &[ChildSummary]) -> String {
120        let mut prompt =
121            "Summarize this repository once from module and root-file summaries.\n\nModules:\n"
122                .to_string();
123        if modules.is_empty() {
124            prompt.push_str("- No modules.\n");
125        } else {
126            for module in modules {
127                let _ = writeln!(prompt, "- {}: {}", module.name, module.summary);
128            }
129        }
130        prompt.push_str("\nRoot files:\n");
131        if files.is_empty() {
132            prompt.push_str("- No root files.\n");
133        } else {
134            for file in files {
135                let _ = writeln!(prompt, "- {}: {}", file.name, file.summary);
136            }
137        }
138        prompt
139    }
140
141    #[derive(Debug, Clone)]
142    pub struct SymbolSummary {
143        pub name: String,
144        pub kind: String,
145        pub component_id: String,
146        pub component_label: String,
147        pub line_start: usize,
148        pub line_end: usize,
149        pub purpose: String,
150    }
151
152    #[derive(Debug, Clone)]
153    pub struct ChildSummary {
154        pub name: String,
155        pub summary: String,
156    }
157}
158
159#[derive(Debug, Clone)]
160pub struct CodewikiInput {
161    pub files: Vec<String>,
162    pub graph_edges: Vec<CodewikiGraphEdge>,
163    pub graph_availability: CodewikiGraphAvailability,
164    pub symbols: Vec<Symbol>,
165}
166
167#[derive(Debug, Clone, PartialEq, Eq)]
168pub struct CodewikiGraphEdge {
169    pub source_component_id: String,
170    pub target_component_id: String,
171    pub kind: CodewikiGraphEdgeKind,
172}
173
174impl CodewikiGraphEdge {
175    pub fn call(
176        source_component_id: impl Into<String>,
177        target_component_id: impl Into<String>,
178    ) -> Self {
179        Self {
180            source_component_id: source_component_id.into(),
181            target_component_id: target_component_id.into(),
182            kind: CodewikiGraphEdgeKind::Call,
183        }
184    }
185
186    pub fn import(
187        source_component_id: impl Into<String>,
188        target_component_id: impl Into<String>,
189    ) -> Self {
190        Self {
191            source_component_id: source_component_id.into(),
192            target_component_id: target_component_id.into(),
193            kind: CodewikiGraphEdgeKind::Import,
194        }
195    }
196}
197
198#[derive(Debug, Clone, Copy, PartialEq, Eq)]
199pub enum CodewikiGraphEdgeKind {
200    Call,
201    Import,
202}
203
204#[derive(Debug, Clone)]
205struct CodewikiGraph {
206    edges: Vec<CodewikiGraphEdge>,
207    availability: CodewikiGraphAvailability,
208}
209
210impl CodewikiGraph {
211    fn available(edges: Vec<CodewikiGraphEdge>) -> Self {
212        Self {
213            edges,
214            availability: CodewikiGraphAvailability::Available,
215        }
216    }
217
218    fn unavailable() -> Self {
219        Self {
220            edges: Vec::new(),
221            availability: CodewikiGraphAvailability::Unavailable,
222        }
223    }
224}
225
226#[derive(Debug, Clone, Copy, PartialEq, Eq)]
227pub enum CodewikiGraphAvailability {
228    Available,
229    Unavailable,
230}
231
232#[derive(Debug, Clone)]
233struct FileDoc {
234    path: String,
235    module: String,
236    summary: String,
237    source_spans: Vec<SourceSpan>,
238    symbols: Vec<SymbolDoc>,
239    component_ids: Vec<String>,
240}
241
242#[derive(Debug, Clone)]
243struct SymbolDoc {
244    symbol: Symbol,
245    purpose: String,
246    component_id: String,
247    component_label: String,
248    source_span: SourceSpan,
249}
250
251#[derive(Debug, Clone)]
252struct ModuleDoc {
253    module: String,
254    summary: String,
255    source_spans: Vec<SourceSpan>,
256    direct_files: Vec<FileLink>,
257    child_modules: Vec<ModuleLink>,
258    component_ids: Vec<String>,
259    dependency_diagram: Option<String>,
260    call_diagram: Option<String>,
261    graph_availability: CodewikiGraphAvailability,
262}
263
264#[derive(Debug, Clone)]
265struct FileLink {
266    path: String,
267    summary: String,
268    source_spans: Vec<SourceSpan>,
269}
270
271#[derive(Debug, Clone)]
272struct ModuleLink {
273    module: String,
274    summary: String,
275    source_spans: Vec<SourceSpan>,
276}
277
278#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]
279struct SourceSpan {
280    file: String,
281    line_start: usize,
282    line_end: usize,
283}
284
285#[derive(Debug, Clone, Serialize)]
286pub struct CodewikiRunSummary {
287    pub command: &'static str,
288    pub project_id: String,
289    pub project_root: String,
290    pub out_dir: String,
291    pub generated_pages: usize,
292    pub changed_paths: Vec<String>,
293    pub skipped: usize,
294    pub files: usize,
295    pub modules: usize,
296    pub symbols: usize,
297    pub ai_enabled: bool,
298}
299
300#[derive(Debug, Clone, Default, Deserialize, Serialize)]
301struct CodewikiMeta {
302    docs: BTreeMap<String, CodewikiDocMeta>,
303    generated_docs: Vec<String>,
304}
305
306#[derive(Debug, Clone, Default, Deserialize, Eq, PartialEq, Serialize)]
307struct CodewikiDocMeta {
308    source_hashes: BTreeMap<String, String>,
309}
310
311pub type TextGenerator<'a> = dyn FnMut(&str, &str) -> Option<String> + 'a;
312
313pub fn run(
314    ctx: &Context,
315    out: Option<String>,
316    scope_args: Vec<String>,
317    ai: Option<AiRouting>,
318    format: Format,
319) -> anyhow::Result<()> {
320    let mut conn = db::connect_readonly(&ctx.database_url)?;
321    let scopes = scope_args
322        .iter()
323        .map(|value| scope::normalize_file_arg(ctx, value))
324        .collect::<Vec<_>>();
325    let files = visibility::visible_tree(&mut conn, ctx)?
326        .into_iter()
327        .map(|file| file.file_path)
328        .filter(|file| in_scope(file, &scopes))
329        .collect::<Vec<_>>();
330    let mut symbols = Vec::new();
331    for file in &files {
332        symbols.extend(visibility::visible_symbols_for_file(&mut conn, ctx, file)?);
333    }
334
335    let graph = fetch_codewiki_graph_edges(ctx, &files, &symbols)?;
336    let input = CodewikiInput {
337        files,
338        graph_edges: graph.edges,
339        graph_availability: graph.availability,
340        symbols,
341    };
342    let mut generator = resolve_text_generator(ctx, ai);
343    let ai_enabled = generator.is_some();
344    let docs = match generator.as_deref_mut() {
345        Some(generate) => generate_hierarchical_docs(&input, Some(generate)),
346        None => generate_hierarchical_docs(&input, None),
347    };
348    let module_count = docs
349        .iter()
350        .filter(|(path, _)| path.starts_with("modules/"))
351        .count();
352    let file_count = docs
353        .iter()
354        .filter(|(path, _)| path.starts_with("files/"))
355        .count();
356    let symbol_count = input
357        .symbols
358        .iter()
359        .filter(|symbol| is_core_file(&symbol.file_path))
360        .count();
361    let out_dir = out.unwrap_or_else(|| DEFAULT_OUT_DIR.to_string());
362    let changed_paths = write_incremental_doc_set(&ctx.project_root, Path::new(&out_dir), &docs)?;
363    let generated_pages = docs.len();
364    let skipped = generated_pages.saturating_sub(changed_paths.len());
365
366    let summary = CodewikiRunSummary {
367        command: "codewiki",
368        project_id: ctx.project_id.clone(),
369        project_root: ctx.project_root.display().to_string(),
370        out_dir,
371        generated_pages,
372        changed_paths,
373        skipped,
374        files: file_count,
375        modules: module_count,
376        symbols: symbol_count,
377        ai_enabled,
378    };
379    match format {
380        Format::Json => output::print_json(&summary),
381        Format::Text => output::print_text(&format!(
382            "wrote {} file docs, {} module docs, and repo.md to {}",
383            summary.files, summary.modules, summary.out_dir
384        )),
385    }
386}
387
388pub fn generate_hierarchical_docs(
389    input: &CodewikiInput,
390    generate: Option<&mut TextGenerator<'_>>,
391) -> Vec<(String, String)> {
392    generate_hierarchical_docs_with_graph_availability(input, generate)
393}
394
395fn generate_hierarchical_docs_with_graph_availability(
396    input: &CodewikiInput,
397    mut generate: Option<&mut TextGenerator<'_>>,
398) -> Vec<(String, String)> {
399    let mut files = input
400        .files
401        .iter()
402        .filter(|file| is_core_file(file))
403        .cloned()
404        .collect::<BTreeSet<_>>();
405    for symbol in &input.symbols {
406        if is_core_file(&symbol.file_path) {
407            files.insert(symbol.file_path.clone());
408        }
409    }
410    let files = files.into_iter().collect::<Vec<_>>();
411
412    let mut symbols_by_file: BTreeMap<String, Vec<Symbol>> = BTreeMap::new();
413    for symbol in &input.symbols {
414        if !is_core_file(&symbol.file_path) {
415            continue;
416        }
417        symbols_by_file
418            .entry(symbol.file_path.clone())
419            .or_default()
420            .push(symbol.clone());
421    }
422    for symbols in symbols_by_file.values_mut() {
423        symbols.sort_by_key(|symbol| (symbol.line_start, symbol.byte_start, symbol.name.clone()));
424    }
425
426    let file_modules = cluster_file_modules(&files, &symbols_by_file, &input.graph_edges);
427    let file_docs = files
428        .iter()
429        .map(|file| {
430            build_file_doc(
431                file,
432                file_modules
433                    .get(file)
434                    .cloned()
435                    .unwrap_or_else(|| module_for_file(file)),
436                symbols_by_file.remove(file).unwrap_or_default(),
437                &mut generate,
438            )
439        })
440        .collect::<Vec<_>>();
441    let module_docs = build_module_docs(
442        &file_docs,
443        &input.graph_edges,
444        input.graph_availability,
445        &mut generate,
446    );
447    let repo_doc = build_repo_doc(&file_docs, &module_docs, &mut generate);
448
449    let mut docs = Vec::new();
450    docs.push(("repo.md".to_string(), repo_doc));
451    for module in &module_docs {
452        docs.push((module_doc_path(&module.module), render_module_doc(module)));
453    }
454    for file in &file_docs {
455        docs.push((file_doc_path(&file.path), render_file_doc(file)));
456    }
457    docs
458}
459
460pub fn write_doc_set(out_dir: &Path, docs: &[(String, String)]) -> anyhow::Result<()> {
461    std::fs::create_dir_all(out_dir)?;
462    for (relative_path, content) in docs {
463        write_doc(out_dir, relative_path, content)?;
464    }
465    Ok(())
466}
467
468pub fn write_incremental_doc_set(
469    project_root: &Path,
470    out_dir: &Path,
471    docs: &[(String, String)],
472) -> anyhow::Result<Vec<String>> {
473    std::fs::create_dir_all(out_dir)?;
474    let previous = read_codewiki_meta(out_dir)?;
475    let mut next_docs = BTreeMap::new();
476    let mut generated_docs = Vec::new();
477
478    for (relative_path, content) in docs {
479        let doc_meta = CodewikiDocMeta {
480            source_hashes: source_hashes_for_doc(project_root, content)?,
481        };
482        let target = safe_doc_path(out_dir, relative_path)?;
483        let unchanged = target.exists()
484            && previous
485                .docs
486                .get(relative_path)
487                .is_some_and(|previous_meta| previous_meta == &doc_meta);
488
489        if !unchanged {
490            write_doc(out_dir, relative_path, content)?;
491            generated_docs.push(relative_path.clone());
492        }
493        next_docs.insert(relative_path.clone(), doc_meta);
494    }
495
496    for stale_path in previous
497        .docs
498        .keys()
499        .filter(|key| !next_docs.contains_key(*key))
500    {
501        let target = safe_doc_path(out_dir, stale_path)?;
502        reject_symlinked_doc_path(out_dir, &target)?;
503        match std::fs::remove_file(&target) {
504            Ok(()) => prune_empty_doc_dirs(out_dir, &target)?,
505            Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
506            Err(err) => return Err(err.into()),
507        }
508    }
509
510    let meta = CodewikiMeta {
511        docs: next_docs,
512        generated_docs: generated_docs.clone(),
513    };
514    write_codewiki_meta(out_dir, &meta)?;
515    Ok(generated_docs)
516}
517
518fn write_doc(out_dir: &Path, relative_path: &str, content: &str) -> anyhow::Result<()> {
519    let target = safe_doc_path(out_dir, relative_path)?;
520    reject_symlinked_doc_path(out_dir, &target)?;
521    if let Some(parent) = target.parent() {
522        std::fs::create_dir_all(parent)?;
523    }
524    std::fs::write(target, content)?;
525    Ok(())
526}
527
528fn reject_symlinked_doc_path(out_dir: &Path, target: &Path) -> anyhow::Result<()> {
529    let relative = target.strip_prefix(out_dir)?;
530    let mut current = out_dir.to_path_buf();
531    for component in relative.components() {
532        current.push(component);
533        match std::fs::symlink_metadata(&current) {
534            Ok(metadata) if metadata.file_type().is_symlink() => {
535                anyhow::bail!(
536                    "refusing to follow symlinked codewiki path: {}",
537                    current.display()
538                );
539            }
540            Ok(_) => {}
541            Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
542            Err(err) => return Err(err.into()),
543        }
544    }
545    Ok(())
546}
547
548fn prune_empty_doc_dirs(out_dir: &Path, target: &Path) -> anyhow::Result<()> {
549    let mut current = target.parent();
550    while let Some(dir) = current {
551        if dir == out_dir {
552            break;
553        }
554        match std::fs::remove_dir(dir) {
555            Ok(()) => current = dir.parent(),
556            Err(err)
557                if matches!(
558                    err.kind(),
559                    std::io::ErrorKind::NotFound | std::io::ErrorKind::DirectoryNotEmpty
560                ) =>
561            {
562                break;
563            }
564            Err(err) => return Err(err.into()),
565        }
566    }
567    Ok(())
568}
569
570fn read_codewiki_meta(out_dir: &Path) -> anyhow::Result<CodewikiMeta> {
571    let path = safe_doc_path(out_dir, CODEWIKI_META_PATH)?;
572    match std::fs::read_to_string(&path) {
573        Ok(raw) => Ok(serde_json::from_str(&raw)?),
574        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(CodewikiMeta::default()),
575        Err(err) => Err(err.into()),
576    }
577}
578
579fn write_codewiki_meta(out_dir: &Path, meta: &CodewikiMeta) -> anyhow::Result<()> {
580    let content = serde_json::to_string_pretty(meta)?;
581    write_doc(out_dir, CODEWIKI_META_PATH, &(content + "\n"))
582}
583
584fn source_hashes_for_doc(
585    project_root: &Path,
586    content: &str,
587) -> anyhow::Result<BTreeMap<String, String>> {
588    let mut hashes = BTreeMap::new();
589    for file in source_files_from_frontmatter(content) {
590        let hash = hasher::file_content_hash(&project_root.join(&file))
591            .map_err(|err| anyhow::anyhow!("failed to hash codewiki source file {file}: {err}"))?;
592        hashes.insert(file, hash);
593    }
594    Ok(hashes)
595}
596
597fn source_files_from_frontmatter(content: &str) -> BTreeSet<String> {
598    let mut files = BTreeSet::new();
599    let mut in_frontmatter = false;
600    for line in content.lines() {
601        if line == "---" {
602            if in_frontmatter {
603                break;
604            }
605            in_frontmatter = true;
606            continue;
607        }
608        if !in_frontmatter {
609            continue;
610        }
611        if let Some(file) = line
612            .strip_prefix("  - file: ")
613            .and_then(unquote_yaml_string)
614        {
615            files.insert(file);
616        }
617    }
618    files
619}
620
621fn unquote_yaml_string(value: &str) -> Option<String> {
622    let value = value.trim();
623    let inner = value.strip_prefix('"')?.strip_suffix('"')?;
624    let mut out = String::new();
625    let mut chars = inner.chars();
626    while let Some(ch) = chars.next() {
627        if ch == '\\' {
628            out.push(chars.next()?);
629        } else {
630            out.push(ch);
631        }
632    }
633    Some(out)
634}
635
636fn fetch_codewiki_graph_edges(
637    ctx: &Context,
638    files: &[String],
639    symbols: &[Symbol],
640) -> anyhow::Result<CodewikiGraph> {
641    let symbol_components = symbols
642        .iter()
643        .filter(|symbol| is_core_file(&symbol.file_path))
644        .map(|symbol| (symbol.id.clone(), component_id(symbol)))
645        .collect::<HashMap<_, _>>();
646    if symbol_components.is_empty() {
647        return Ok(CodewikiGraph::available(Vec::new()));
648    }
649
650    let Some(config) = &ctx.falkordb else {
651        return Ok(CodewikiGraph::unavailable());
652    };
653
654    let mut client = match falkor::FalkorClient::from_config(config) {
655        Ok(client) => client,
656        Err(e) => {
657            if !ctx.quiet {
658                eprintln!("Warning: FalkorDB connection failed: {e}");
659            }
660            return Ok(CodewikiGraph::unavailable());
661        }
662    };
663
664    fn query_or_unavailable(
665        ctx: &Context,
666        client: &mut falkor::FalkorClient,
667        query: &str,
668        params: HashMap<String, String>,
669    ) -> Option<Vec<falkor::Row>> {
670        match client.query(query, Some(params)) {
671            Ok(rows) => Some(rows),
672            Err(e) => {
673                if !ctx.quiet {
674                    eprintln!("Warning: FalkorDB query failed: {e}");
675                }
676                None
677            }
678        }
679    }
680
681    let symbol_ids = symbol_components.keys().cloned().collect::<Vec<_>>();
682    let core_files = files
683        .iter()
684        .filter(|file| is_core_file(file))
685        .cloned()
686        .collect::<Vec<_>>();
687
688    let mut edges = Vec::new();
689    let (query, params) = codewiki_call_edges_query(&ctx.project_id, &symbol_ids);
690    let Some(rows) = query_or_unavailable(ctx, &mut client, &query, params) else {
691        return Ok(CodewikiGraph::unavailable());
692    };
693    for row in rows {
694        let Some(source) = row.get("source").and_then(|value| value.as_str()) else {
695            continue;
696        };
697        let Some(target) = row.get("target").and_then(|value| value.as_str()) else {
698            continue;
699        };
700        let Some(source_component_id) = symbol_components.get(source).cloned() else {
701            continue;
702        };
703        let Some(target_component_id) = symbol_components.get(target).cloned() else {
704            continue;
705        };
706        edges.push(CodewikiGraphEdge::call(
707            source_component_id,
708            target_component_id,
709        ));
710    }
711
712    if !core_files.is_empty() {
713        let file_symbols = symbols_by_file_component(symbols);
714        let (query, params) = codewiki_import_edges_query(&ctx.project_id, &core_files);
715        let Some(rows) = query_or_unavailable(ctx, &mut client, &query, params) else {
716            return Ok(CodewikiGraph::unavailable());
717        };
718        for row in rows {
719            let Some(source_file) = row.get("source").and_then(|value| value.as_str()) else {
720                continue;
721            };
722            let Some(target_module) = row.get("target").and_then(|value| value.as_str()) else {
723                continue;
724            };
725            let Some(source_component_id) = first_component_for_file(&file_symbols, source_file)
726            else {
727                continue;
728            };
729            for target_file in files_for_import_target(&core_files, target_module) {
730                let Some(target_component_id) =
731                    first_component_for_file(&file_symbols, target_file)
732                else {
733                    continue;
734                };
735                edges.push(CodewikiGraphEdge::import(
736                    source_component_id.clone(),
737                    target_component_id,
738                ));
739            }
740        }
741    }
742
743    Ok(CodewikiGraph::available(edges))
744}
745
746fn codewiki_call_edges_query(
747    project_id: &str,
748    symbol_ids: &[String],
749) -> (String, HashMap<String, String>) {
750    (
751        format!(
752            "MATCH (source:CodeSymbol {{project: $project}})-[:CALLS]->(target:CodeSymbol {{project: $project}}) \
753             WHERE source.id IN [{}] AND target.id IN [{}] \
754             RETURN source.id AS source, target.id AS target \
755             LIMIT 5000",
756            falkor::id_list_literal(symbol_ids),
757            falkor::id_list_literal(symbol_ids)
758        ),
759        HashMap::from([(
760            "project".to_string(),
761            falkor::cypher_string_literal(project_id),
762        )]),
763    )
764}
765
766fn codewiki_import_edges_query(
767    project_id: &str,
768    files: &[String],
769) -> (String, HashMap<String, String>) {
770    (
771        format!(
772            "MATCH (source:CodeFile {{project: $project}})-[:IMPORTS]->(target:CodeModule {{project: $project}}) \
773             WHERE source.path IN [{}] \
774             RETURN source.path AS source, target.name AS target \
775             LIMIT 5000",
776            falkor::id_list_literal(files)
777        ),
778        HashMap::from([(
779            "project".to_string(),
780            falkor::cypher_string_literal(project_id),
781        )]),
782    )
783}
784
785fn cluster_file_modules(
786    files: &[String],
787    symbols_by_file: &BTreeMap<String, Vec<Symbol>>,
788    graph_edges: &[CodewikiGraphEdge],
789) -> HashMap<String, String> {
790    let mut components_to_file = HashMap::new();
791    for (file, symbols) in symbols_by_file {
792        for symbol in symbols {
793            components_to_file.insert(component_id(symbol), file.clone());
794        }
795    }
796
797    let mut parents = files
798        .iter()
799        .map(|file| (file.clone(), file.clone()))
800        .collect::<HashMap<_, _>>();
801    for edge in graph_edges
802        .iter()
803        .filter(|edge| edge.kind == CodewikiGraphEdgeKind::Call)
804    {
805        let Some(source_file) = components_to_file.get(&edge.source_component_id) else {
806            continue;
807        };
808        let Some(target_file) = components_to_file.get(&edge.target_component_id) else {
809            continue;
810        };
811        union_files(&mut parents, source_file, target_file);
812    }
813
814    let mut grouped: BTreeMap<String, Vec<String>> = BTreeMap::new();
815    for file in files {
816        let root = find_file_root(&mut parents, file);
817        grouped.entry(root).or_default().push(file.clone());
818    }
819
820    let mut modules = HashMap::new();
821    for grouped_files in grouped.values() {
822        let module = if grouped_files.len() > 1 {
823            common_module_for_files(grouped_files)
824        } else {
825            module_for_file(&grouped_files[0])
826        };
827        for file in grouped_files {
828            modules.insert(file.clone(), module.clone());
829        }
830    }
831    modules
832}
833
834fn union_files(parents: &mut HashMap<String, String>, left: &str, right: &str) {
835    let left_root = find_file_root(parents, left);
836    let right_root = find_file_root(parents, right);
837    if left_root != right_root {
838        let (parent, child) = if left_root <= right_root {
839            (left_root, right_root)
840        } else {
841            (right_root, left_root)
842        };
843        parents.insert(child, parent);
844    }
845}
846
847fn find_file_root(parents: &mut HashMap<String, String>, file: &str) -> String {
848    let mut current = file.to_string();
849    let mut path = Vec::new();
850    let mut seen = HashSet::new();
851
852    let root = loop {
853        if !seen.insert(current.clone()) {
854            let root = path
855                .iter()
856                .chain(std::iter::once(&current))
857                .min()
858                .cloned()
859                .unwrap_or_else(|| current.clone());
860            parents.insert(current, root.clone());
861            break root;
862        }
863
864        let parent = parents
865            .get(&current)
866            .cloned()
867            .unwrap_or_else(|| current.clone());
868        if parent == current {
869            break parent;
870        }
871
872        path.push(current);
873        current = parent;
874    };
875
876    for node in path {
877        parents.insert(node, root.clone());
878    }
879    root
880}
881
882fn common_module_for_files(files: &[String]) -> String {
883    let mut common = module_for_file(&files[0])
884        .split('/')
885        .filter(|part| !part.is_empty())
886        .map(str::to_string)
887        .collect::<Vec<_>>();
888    for file in &files[1..] {
889        let parts = module_for_file(file)
890            .split('/')
891            .filter(|part| !part.is_empty())
892            .map(str::to_string)
893            .collect::<Vec<_>>();
894        let keep = common
895            .iter()
896            .zip(parts.iter())
897            .take_while(|(left, right)| left == right)
898            .count();
899        common.truncate(keep);
900    }
901    common.join("/")
902}
903
904fn symbols_by_file_component(symbols: &[Symbol]) -> BTreeMap<String, Vec<String>> {
905    let mut out: BTreeMap<String, Vec<String>> = BTreeMap::new();
906    for symbol in symbols {
907        if is_core_file(&symbol.file_path) {
908            out.entry(symbol.file_path.clone())
909                .or_default()
910                .push(component_id(symbol));
911        }
912    }
913    out
914}
915
916fn first_component_for_file(
917    symbols_by_file: &BTreeMap<String, Vec<String>>,
918    file: &str,
919) -> Option<String> {
920    symbols_by_file
921        .get(file)
922        .and_then(|components| components.first())
923        .cloned()
924}
925
926fn files_for_import_target<'a>(files: &'a [String], target_module: &str) -> Vec<&'a str> {
927    let target = target_module.replace("::", "/").replace('.', "/");
928    files
929        .iter()
930        .map(String::as_str)
931        .filter(|file| {
932            file.starts_with(&format!("{target}/")) || file.contains(&format!("/{target}/"))
933        })
934        .collect()
935}
936
937fn build_file_doc(
938    file: &str,
939    module: String,
940    symbols: Vec<Symbol>,
941    generate: &mut Option<&mut TextGenerator<'_>>,
942) -> FileDoc {
943    let symbol_docs = symbols
944        .into_iter()
945        .map(|symbol| {
946            let fallback = structural_symbol_purpose(&symbol);
947            let generated = maybe_generate(
948                generate,
949                &prompts::symbol_prompt(&symbol),
950                prompts::SYMBOL_SYSTEM,
951            )
952            .unwrap_or(fallback);
953            let component_id = component_id(&symbol);
954            let component_label = component_label(&symbol);
955            let source_span = SourceSpan::from_symbol(&symbol);
956            let purpose = ground_text(
957                &generated,
958                std::slice::from_ref(&source_span),
959                &source_span.citation(),
960            );
961            SymbolDoc {
962                symbol,
963                purpose,
964                component_id,
965                component_label,
966                source_span,
967            }
968        })
969        .collect::<Vec<_>>();
970    let source_spans = symbol_docs
971        .iter()
972        .map(|symbol| symbol.source_span.clone())
973        .collect::<Vec<_>>();
974    let prompt_symbols = symbol_docs
975        .iter()
976        .map(|symbol| prompts::SymbolSummary {
977            name: symbol.symbol.qualified_name.clone(),
978            kind: symbol.symbol.kind.clone(),
979            component_id: symbol.component_id.clone(),
980            component_label: symbol.component_label.clone(),
981            line_start: symbol.symbol.line_start,
982            line_end: symbol.symbol.line_end,
983            purpose: symbol.purpose.clone(),
984        })
985        .collect::<Vec<_>>();
986    let component_ids = symbol_docs
987        .iter()
988        .map(|symbol| symbol.component_id.clone())
989        .collect::<Vec<_>>();
990    let fallback = structural_file_summary(file, &symbol_docs);
991    let generated = maybe_generate(
992        generate,
993        &prompts::file_prompt(file, &prompt_symbols),
994        prompts::FILE_SYSTEM,
995    )
996    .unwrap_or(fallback);
997    let summary = ground_text(&generated, &source_spans, &citation_list(&source_spans));
998
999    FileDoc {
1000        path: file.to_string(),
1001        module,
1002        summary,
1003        source_spans,
1004        symbols: symbol_docs,
1005        component_ids,
1006    }
1007}
1008
1009fn build_module_docs(
1010    files: &[FileDoc],
1011    graph_edges: &[CodewikiGraphEdge],
1012    graph_availability: CodewikiGraphAvailability,
1013    generate: &mut Option<&mut TextGenerator<'_>>,
1014) -> Vec<ModuleDoc> {
1015    let mut module_names = BTreeSet::new();
1016    for file in files {
1017        for module in module_ancestors(&file.module) {
1018            module_names.insert(module);
1019        }
1020    }
1021
1022    let mut module_summaries: BTreeMap<String, String> = BTreeMap::new();
1023    let mut module_sources: BTreeMap<String, Vec<SourceSpan>> = BTreeMap::new();
1024    let mut modules = module_names.into_iter().collect::<Vec<_>>();
1025    modules.sort_by_key(|module| std::cmp::Reverse(module_depth(module)));
1026
1027    let mut docs = Vec::new();
1028    for module in modules {
1029        let direct_files = files
1030            .iter()
1031            .filter(|file| file.module == module)
1032            .map(|file| FileLink {
1033                path: file.path.clone(),
1034                summary: file.summary.clone(),
1035                source_spans: file.source_spans.clone(),
1036            })
1037            .collect::<Vec<_>>();
1038        let child_modules = direct_child_modules(&module, module_summaries.keys())
1039            .into_iter()
1040            .map(|child| ModuleLink {
1041                summary: module_summaries.get(&child).cloned().unwrap_or_default(),
1042                source_spans: module_sources.get(&child).cloned().unwrap_or_default(),
1043                module: child,
1044            })
1045            .collect::<Vec<_>>();
1046        let file_summaries = direct_files
1047            .iter()
1048            .map(|file| prompts::ChildSummary {
1049                name: file.path.clone(),
1050                summary: file.summary.clone(),
1051            })
1052            .collect::<Vec<_>>();
1053        let child_summaries = child_modules
1054            .iter()
1055            .map(|module| prompts::ChildSummary {
1056                name: module.module.clone(),
1057                summary: module.summary.clone(),
1058            })
1059            .collect::<Vec<_>>();
1060        let component_ids = files
1061            .iter()
1062            .filter(|file| file.module == module || module_is_ancestor(&module, &file.module))
1063            .flat_map(|file| {
1064                file.symbols
1065                    .iter()
1066                    .map(|symbol| format!("{} ({})", symbol.component_label, symbol.component_id))
1067            })
1068            .collect::<Vec<_>>();
1069        let dependency_diagram = render_module_dependency_mermaid(&module, files, graph_edges);
1070        let call_diagram = render_module_call_mermaid(&module, files, graph_edges);
1071        let fallback = structural_module_summary(&module, &direct_files, &child_modules);
1072        let source_spans = collect_link_spans(&direct_files, &child_modules);
1073        let generated = maybe_generate(
1074            generate,
1075            &prompts::module_prompt(&module, &file_summaries, &child_summaries, &component_ids),
1076            prompts::MODULE_SYSTEM,
1077        )
1078        .unwrap_or(fallback);
1079        let summary = ground_text(&generated, &source_spans, &citation_list(&source_spans));
1080
1081        module_summaries.insert(module.clone(), summary.clone());
1082        module_sources.insert(module.clone(), source_spans.clone());
1083        docs.push(ModuleDoc {
1084            module,
1085            summary,
1086            source_spans,
1087            direct_files,
1088            child_modules,
1089            component_ids,
1090            dependency_diagram,
1091            call_diagram,
1092            graph_availability,
1093        });
1094    }
1095
1096    docs.sort_by(|a, b| a.module.cmp(&b.module));
1097    docs
1098}
1099
1100fn render_module_dependency_mermaid(
1101    module: &str,
1102    files: &[FileDoc],
1103    graph_edges: &[CodewikiGraphEdge],
1104) -> Option<String> {
1105    let mut component_to_module = HashMap::new();
1106    for file in files {
1107        for component_id in &file.component_ids {
1108            component_to_module.insert(component_id.as_str(), file.module.as_str());
1109        }
1110    }
1111
1112    let all_edges = graph_edges
1113        .iter()
1114        .filter(|edge| edge.kind == CodewikiGraphEdgeKind::Import)
1115        .filter_map(|edge| {
1116            let source = component_to_module.get(edge.source_component_id.as_str())?;
1117            let target = component_to_module.get(edge.target_component_id.as_str())?;
1118            if source == target {
1119                return None;
1120            }
1121            Some(((*source).to_string(), (*target).to_string()))
1122        })
1123        .collect::<BTreeSet<_>>();
1124    if all_edges.is_empty() {
1125        return None;
1126    }
1127
1128    let bounded_edges = bounded_module_dependency_edges(module, &all_edges, MAX_MERMAID_HOPS);
1129    if bounded_edges.is_empty() {
1130        return None;
1131    }
1132
1133    let mut diagram = "```mermaid\ngraph LR\n".to_string();
1134    for (source, target) in bounded_edges {
1135        let _ = writeln!(
1136            diagram,
1137            "    {}[\"{}\"] --> {}[\"{}\"]",
1138            mermaid_node_id(&source),
1139            mermaid_label(&source),
1140            mermaid_node_id(&target),
1141            mermaid_label(&target)
1142        );
1143    }
1144    diagram.push_str("```\n");
1145    Some(diagram)
1146}
1147
1148fn render_module_call_mermaid(
1149    module: &str,
1150    files: &[FileDoc],
1151    graph_edges: &[CodewikiGraphEdge],
1152) -> Option<String> {
1153    let component_labels = files
1154        .iter()
1155        .flat_map(|file| {
1156            file.symbols.iter().map(|symbol| {
1157                (
1158                    symbol.component_id.as_str(),
1159                    symbol.component_label.as_str(),
1160                )
1161            })
1162        })
1163        .collect::<HashMap<_, _>>();
1164    let component_to_module = files
1165        .iter()
1166        .flat_map(|file| {
1167            file.component_ids
1168                .iter()
1169                .map(|component_id| (component_id.as_str(), file.module.as_str()))
1170        })
1171        .collect::<HashMap<_, _>>();
1172    let all_edges = graph_edges
1173        .iter()
1174        .filter(|edge| edge.kind == CodewikiGraphEdgeKind::Call)
1175        .filter_map(|edge| {
1176            let source_module = component_to_module.get(edge.source_component_id.as_str())?;
1177            let target_module = component_to_module.get(edge.target_component_id.as_str())?;
1178            if *source_module != module && *target_module != module {
1179                return None;
1180            }
1181            Some((
1182                edge.source_component_id.clone(),
1183                edge.target_component_id.clone(),
1184            ))
1185        })
1186        .collect::<BTreeSet<_>>();
1187    if all_edges.is_empty() {
1188        return None;
1189    }
1190
1191    let seed_components = files
1192        .iter()
1193        .filter(|file| file.module == module || module_is_ancestor(module, &file.module))
1194        .flat_map(|file| file.component_ids.iter().cloned())
1195        .collect::<BTreeSet<_>>();
1196    let bounded_edges = bounded_component_edges(
1197        &seed_components,
1198        &all_edges,
1199        MAX_MERMAID_HOPS,
1200        MAX_MERMAID_EDGES,
1201    );
1202    if bounded_edges.is_empty() {
1203        return None;
1204    }
1205
1206    let mut participants = BTreeSet::new();
1207    for (source, target) in &bounded_edges {
1208        participants.insert(source.clone());
1209        participants.insert(target.clone());
1210    }
1211
1212    let mut diagram = "```mermaid\nsequenceDiagram\n".to_string();
1213    for component in participants {
1214        let _ = writeln!(
1215            diagram,
1216            "    participant {} as {}",
1217            mermaid_node_id(&component),
1218            mermaid_label(
1219                component_labels
1220                    .get(component.as_str())
1221                    .copied()
1222                    .unwrap_or(&component)
1223            )
1224        );
1225    }
1226    for (source, target) in bounded_edges {
1227        let _ = writeln!(
1228            diagram,
1229            "    {}->>{}: calls",
1230            mermaid_node_id(&source),
1231            mermaid_node_id(&target)
1232        );
1233    }
1234    diagram.push_str("```\n");
1235    Some(diagram)
1236}
1237
1238fn bounded_module_dependency_edges(
1239    module: &str,
1240    edges: &BTreeSet<(String, String)>,
1241    max_hops: usize,
1242) -> BTreeSet<(String, String)> {
1243    let mut distances = BTreeMap::from([(module.to_string(), 0usize)]);
1244    let mut queue = VecDeque::from([(module.to_string(), 0usize)]);
1245
1246    while let Some((current, distance)) = queue.pop_front() {
1247        if distance >= max_hops {
1248            continue;
1249        }
1250        for (source, target) in edges {
1251            for next in dependency_neighbors(&current, source, target) {
1252                if distances.contains_key(next) {
1253                    continue;
1254                }
1255                let next_distance = distance + 1;
1256                distances.insert(next.to_string(), next_distance);
1257                queue.push_back((next.to_string(), next_distance));
1258            }
1259        }
1260    }
1261
1262    edges
1263        .iter()
1264        .filter(|(source, target)| distances.contains_key(source) && distances.contains_key(target))
1265        .cloned()
1266        .collect()
1267}
1268
1269fn bounded_component_edges(
1270    seed_components: &BTreeSet<String>,
1271    edges: &BTreeSet<(String, String)>,
1272    max_hops: usize,
1273    max_edges: usize,
1274) -> BTreeSet<(String, String)> {
1275    let mut distances = seed_components
1276        .iter()
1277        .map(|component| (component.clone(), 0usize))
1278        .collect::<BTreeMap<_, _>>();
1279    let mut queue = seed_components
1280        .iter()
1281        .map(|component| (component.clone(), 0usize))
1282        .collect::<VecDeque<_>>();
1283
1284    while let Some((current, distance)) = queue.pop_front() {
1285        if distance >= max_hops {
1286            continue;
1287        }
1288        for (source, target) in edges {
1289            for next in dependency_neighbors(&current, source, target) {
1290                if distances.contains_key(next) {
1291                    continue;
1292                }
1293                let next_distance = distance + 1;
1294                distances.insert(next.to_string(), next_distance);
1295                queue.push_back((next.to_string(), next_distance));
1296            }
1297        }
1298    }
1299
1300    edges
1301        .iter()
1302        .filter(|(source, target)| distances.contains_key(source) && distances.contains_key(target))
1303        .take(max_edges)
1304        .cloned()
1305        .collect()
1306}
1307
1308fn dependency_neighbors<'a>(module: &str, source: &'a str, target: &'a str) -> Vec<&'a str> {
1309    let mut neighbors = Vec::with_capacity(2);
1310    if source == module {
1311        neighbors.push(target);
1312    }
1313    if target == module {
1314        neighbors.push(source);
1315    }
1316    neighbors
1317}
1318
1319fn mermaid_node_id(module: &str) -> String {
1320    let mut out = String::from("m_");
1321    for ch in module.chars() {
1322        if ch.is_ascii_alphanumeric() {
1323            out.push(ch);
1324        } else {
1325            out.push('_');
1326        }
1327    }
1328    out
1329}
1330
1331fn mermaid_label(module: &str) -> String {
1332    if module.is_empty() {
1333        "repo".to_string()
1334    } else {
1335        module.replace('\\', "\\\\").replace('"', "\\\"")
1336    }
1337}
1338
1339fn build_repo_doc(
1340    files: &[FileDoc],
1341    modules: &[ModuleDoc],
1342    generate: &mut Option<&mut TextGenerator<'_>>,
1343) -> String {
1344    let top_modules = modules
1345        .iter()
1346        .filter(|module| parent_module(&module.module).is_none())
1347        .map(|module| ModuleLink {
1348            module: module.module.clone(),
1349            summary: module.summary.clone(),
1350            source_spans: module.source_spans.clone(),
1351        })
1352        .collect::<Vec<_>>();
1353    let root_files = files
1354        .iter()
1355        .filter(|file| file.module.is_empty())
1356        .map(|file| FileLink {
1357            path: file.path.clone(),
1358            summary: file.summary.clone(),
1359            source_spans: file.source_spans.clone(),
1360        })
1361        .collect::<Vec<_>>();
1362    let module_summaries = top_modules
1363        .iter()
1364        .map(|module| prompts::ChildSummary {
1365            name: module.module.clone(),
1366            summary: module.summary.clone(),
1367        })
1368        .collect::<Vec<_>>();
1369    let file_summaries = root_files
1370        .iter()
1371        .map(|file| prompts::ChildSummary {
1372            name: file.path.clone(),
1373            summary: file.summary.clone(),
1374        })
1375        .collect::<Vec<_>>();
1376    let fallback = structural_repo_summary(files.len(), modules.len());
1377    let source_spans = collect_link_spans(&root_files, &top_modules);
1378    let generated = maybe_generate(
1379        generate,
1380        &prompts::repo_prompt(&module_summaries, &file_summaries),
1381        prompts::REPO_SYSTEM,
1382    )
1383    .unwrap_or(fallback);
1384    let summary = ground_text(&generated, &source_spans, &citation_list(&source_spans));
1385
1386    render_repo_doc(&summary, &top_modules, &root_files, &source_spans)
1387}
1388
1389fn render_repo_doc(
1390    summary: &str,
1391    modules: &[ModuleLink],
1392    files: &[FileLink],
1393    source_spans: &[SourceSpan],
1394) -> String {
1395    let mut doc = frontmatter("Repository Overview", "code_repo", source_spans);
1396    doc.push_str("# Repository Overview\n\n");
1397    write_section(&mut doc, "Overview", summary);
1398    if !modules.is_empty() {
1399        doc.push_str("## Modules\n\n");
1400        for module in modules {
1401            let _ = writeln!(
1402                doc,
1403                "- {} - {}",
1404                module_wikilink(&module.module),
1405                module.summary
1406            );
1407        }
1408        doc.push('\n');
1409    }
1410    if !files.is_empty() {
1411        doc.push_str("## Files\n\n");
1412        for file in files {
1413            let _ = writeln!(doc, "- {} - {}", file_wikilink(&file.path), file.summary);
1414        }
1415        doc.push('\n');
1416    }
1417    doc
1418}
1419
1420fn render_module_doc(module: &ModuleDoc) -> String {
1421    let mut doc = frontmatter(&module.module, "code_module", &module.source_spans);
1422    let _ = writeln!(doc, "# {}\n", module.module);
1423    match parent_module(&module.module) {
1424        Some(parent) => {
1425            let _ = writeln!(doc, "Parent: {}\n", module_wikilink(parent));
1426        }
1427        None => doc.push_str("Parent: [[repo|Repository Overview]]\n\n"),
1428    }
1429    write_section(&mut doc, "Overview", &module.summary);
1430    match module.graph_availability {
1431        CodewikiGraphAvailability::Unavailable => {
1432            doc.push_str("## Dependency Diagram\n\n`degraded: graph-unavailable`\n\n");
1433        }
1434        CodewikiGraphAvailability::Available => {
1435            if let Some(diagram) = &module.dependency_diagram {
1436                doc.push_str("## Dependency Diagram\n\n");
1437                doc.push_str(diagram);
1438                doc.push('\n');
1439            }
1440            if let Some(diagram) = &module.call_diagram {
1441                doc.push_str("## Call Diagram\n\n");
1442                doc.push_str(diagram);
1443                doc.push('\n');
1444            }
1445        }
1446    }
1447    if !module.child_modules.is_empty() {
1448        doc.push_str("## Child Modules\n\n");
1449        for child in &module.child_modules {
1450            let _ = writeln!(
1451                doc,
1452                "- {} - {}",
1453                module_wikilink(&child.module),
1454                child.summary
1455            );
1456        }
1457        doc.push('\n');
1458    }
1459    if !module.direct_files.is_empty() {
1460        doc.push_str("## Files\n\n");
1461        for file in &module.direct_files {
1462            let _ = writeln!(doc, "- {} - {}", file_wikilink(&file.path), file.summary);
1463        }
1464        doc.push('\n');
1465    }
1466    if !module.component_ids.is_empty() {
1467        doc.push_str("## Components\n\n");
1468        for component_id in &module.component_ids {
1469            let _ = writeln!(doc, "- {}", inline_code(component_id));
1470        }
1471        doc.push('\n');
1472    }
1473    doc
1474}
1475
1476fn render_file_doc(file: &FileDoc) -> String {
1477    let mut doc = frontmatter(&file.path, "code_file", &file.source_spans);
1478    let _ = writeln!(doc, "# {}\n", file.path);
1479    if file.module.is_empty() {
1480        doc.push_str("Module: [[repo|Repository Overview]]\n\n");
1481    } else {
1482        let _ = writeln!(doc, "Module: {}\n", module_wikilink(&file.module));
1483    }
1484    write_section(&mut doc, "Purpose", &file.summary);
1485    doc.push_str("## API Symbols\n\n");
1486    if file.symbols.is_empty() {
1487        doc.push_str("No indexed symbols.\n");
1488        return doc;
1489    }
1490    for symbol in &file.symbols {
1491        let _ = writeln!(
1492            doc,
1493            "- {} ({}) component {} ({}) lines {}-{} {}",
1494            inline_code(&symbol.symbol.qualified_name),
1495            symbol.symbol.kind,
1496            inline_code(&symbol.component_label),
1497            inline_code(&symbol.component_id),
1498            symbol.symbol.line_start,
1499            symbol.symbol.line_end,
1500            symbol.source_span.citation()
1501        );
1502        if let Some(signature) = symbol
1503            .symbol
1504            .signature
1505            .as_deref()
1506            .filter(|value| !value.is_empty())
1507        {
1508            let _ = writeln!(doc, "  - Signature: {}", inline_code(signature));
1509        }
1510        let _ = writeln!(doc, "  - Purpose: {}", symbol.purpose);
1511    }
1512    doc.push('\n');
1513    doc
1514}
1515
1516fn resolve_text_generator(
1517    ctx: &Context,
1518    ai: Option<AiRouting>,
1519) -> Option<Box<TextGenerator<'static>>> {
1520    let ai_context = resolve_ai_context(ctx, ai).ok()?;
1521    let route = effective_route(&ai_context, AiCapability::TextGenerate);
1522    if matches!(route, AiRouting::Off | AiRouting::Auto) {
1523        return None;
1524    }
1525
1526    let mut warned = false;
1527    let quiet = ctx.quiet;
1528    Some(Box::new(move |prompt, system| {
1529        let result = match route {
1530            AiRouting::Daemon => generate_via_daemon(&ai_context, prompt, Some(system)),
1531            AiRouting::Direct => generate_text(&ai_context, prompt, Some(system)),
1532            AiRouting::Off | AiRouting::Auto => return None,
1533        };
1534        match result {
1535            Ok(result) => clean_generated(result.text),
1536            Err(error) => {
1537                if !quiet && !warned {
1538                    eprintln!("text generation unavailable; using AST-only codewiki docs: {error}");
1539                    warned = true;
1540                }
1541                None
1542            }
1543        }
1544    }))
1545}
1546
1547fn resolve_ai_context(ctx: &Context, ai: Option<AiRouting>) -> anyhow::Result<AiContext> {
1548    let mut conn = db::connect_readonly(&ctx.database_url)?;
1549    let standalone = config::read_standalone_config_optional();
1550    let primary = PostgresAiConfigSource::new(&mut conn, secrets::resolve_config_value);
1551    let mut source = AiConfigSource::with_primary(primary, standalone);
1552    Ok(AiContext::resolve_with_options(
1553        Some(ctx.project_id.clone()),
1554        &mut source,
1555        AiContextOptions {
1556            no_ai: false,
1557            forced_routing: ai,
1558        },
1559    ))
1560}
1561
1562fn maybe_generate(
1563    generate: &mut Option<&mut TextGenerator<'_>>,
1564    prompt: &str,
1565    system: &str,
1566) -> Option<String> {
1567    generate
1568        .as_deref_mut()
1569        .and_then(|generate| generate(prompt, system))
1570        .and_then(clean_generated)
1571}
1572
1573fn clean_generated(text: String) -> Option<String> {
1574    let text = text.trim();
1575    (!text.is_empty()).then(|| text.to_string())
1576}
1577
1578fn structural_symbol_purpose(symbol: &Symbol) -> String {
1579    if let Some(summary) = symbol.summary.as_deref().filter(|value| !value.is_empty()) {
1580        return summary.to_string();
1581    }
1582    if let Some(docstring) = symbol
1583        .docstring
1584        .as_deref()
1585        .filter(|value| !value.is_empty())
1586    {
1587        return docstring.to_string();
1588    }
1589    format!(
1590        "Indexed {} `{}` in `{}`.",
1591        symbol.kind, symbol.qualified_name, symbol.file_path
1592    )
1593}
1594
1595fn structural_file_summary(file: &str, symbols: &[SymbolDoc]) -> String {
1596    if symbols.is_empty() {
1597        return format!("`{file}` has no indexed API symbols.");
1598    }
1599    format!(
1600        "`{file}` exposes {} indexed API symbol{}.",
1601        symbols.len(),
1602        plural(symbols.len())
1603    )
1604}
1605
1606fn structural_module_summary(
1607    module: &str,
1608    files: &[FileLink],
1609    child_modules: &[ModuleLink],
1610) -> String {
1611    let file_count = files.len();
1612    let child_count = child_modules.len();
1613    format!(
1614        "`{module}` contains {file_count} direct file{} and {child_count} child module{}.",
1615        plural(file_count),
1616        plural(child_count)
1617    )
1618}
1619
1620fn structural_repo_summary(file_count: usize, module_count: usize) -> String {
1621    format!(
1622        "Repository code documentation covers {file_count} file{} across {module_count} module{}.",
1623        plural(file_count),
1624        plural(module_count)
1625    )
1626}
1627
1628fn write_section(doc: &mut String, heading: &str, body: &str) {
1629    let _ = writeln!(doc, "## {heading}\n\n{}\n", body.trim());
1630}
1631
1632impl SourceSpan {
1633    fn from_symbol(symbol: &Symbol) -> Self {
1634        Self {
1635            file: symbol.file_path.clone(),
1636            line_start: symbol.line_start,
1637            line_end: symbol.line_end,
1638        }
1639    }
1640
1641    fn citation(&self) -> String {
1642        if self.line_start == self.line_end {
1643            format!("[{}:{}]", self.file, self.line_start)
1644        } else {
1645            format!("[{}:{}-{}]", self.file, self.line_start, self.line_end)
1646        }
1647    }
1648
1649    fn contains(&self, file: &str, line_start: usize, line_end: usize) -> bool {
1650        self.file == file && self.line_start <= line_start && line_end <= self.line_end
1651    }
1652}
1653
1654fn collect_link_spans(files: &[FileLink], modules: &[ModuleLink]) -> Vec<SourceSpan> {
1655    let mut spans = BTreeSet::new();
1656    for file in files {
1657        spans.extend(file.source_spans.iter().cloned());
1658    }
1659    for module in modules {
1660        spans.extend(module.source_spans.iter().cloned());
1661    }
1662    spans.into_iter().collect()
1663}
1664
1665fn citation_list(spans: &[SourceSpan]) -> String {
1666    spans
1667        .iter()
1668        .cloned()
1669        .collect::<BTreeSet<_>>()
1670        .into_iter()
1671        .map(|span| span.citation())
1672        .collect::<Vec<_>>()
1673        .join(" ")
1674}
1675
1676fn ground_text(text: &str, valid_spans: &[SourceSpan], fallback_citation: &str) -> String {
1677    let cleaned = strip_invalid_citations(text, valid_spans);
1678    if fallback_citation.is_empty() || contains_valid_citation(&cleaned, valid_spans) {
1679        cleaned
1680    } else {
1681        format!("{cleaned} {fallback_citation}")
1682    }
1683}
1684
1685fn strip_invalid_citations(text: &str, valid_spans: &[SourceSpan]) -> String {
1686    let mut out = String::new();
1687    let mut rest = text;
1688    while let Some(open) = rest.find('[') {
1689        let (before, after_open) = rest.split_at(open);
1690        out.push_str(before);
1691        let after_open = &after_open[1..];
1692        let Some(close) = after_open.find(']') else {
1693            out.push('[');
1694            out.push_str(after_open);
1695            return out;
1696        };
1697        let candidate = &after_open[..close];
1698        if citation_parts(candidate).is_none_or(|(file, start, end)| {
1699            valid_spans
1700                .iter()
1701                .any(|span| span.contains(file, start, end))
1702        }) {
1703            out.push('[');
1704            out.push_str(candidate);
1705            out.push(']');
1706        }
1707        rest = &after_open[close + 1..];
1708    }
1709    out.push_str(rest);
1710    out
1711}
1712
1713fn contains_valid_citation(text: &str, valid_spans: &[SourceSpan]) -> bool {
1714    let mut rest = text;
1715    while let Some(open) = rest.find('[') {
1716        let after_open = &rest[open + 1..];
1717        let Some(close) = after_open.find(']') else {
1718            return false;
1719        };
1720        if let Some((file, start, end)) = citation_parts(&after_open[..close])
1721            && valid_spans
1722                .iter()
1723                .any(|span| span.contains(file, start, end))
1724        {
1725            return true;
1726        }
1727        rest = &after_open[close + 1..];
1728    }
1729    false
1730}
1731
1732fn citation_parts(value: &str) -> Option<(&str, usize, usize)> {
1733    let (file, range) = value.rsplit_once(':')?;
1734    if file.is_empty() || file.chars().any(char::is_whitespace) {
1735        return None;
1736    }
1737    let (line_start, line_end) = match range.split_once('-') {
1738        Some((start, end)) => (start.parse().ok()?, end.parse().ok()?),
1739        None => {
1740            let line = range.parse().ok()?;
1741            (line, line)
1742        }
1743    };
1744    (line_start > 0 && line_start <= line_end).then_some((file, line_start, line_end))
1745}
1746
1747fn frontmatter(title: &str, kind: &str, source_spans: &[SourceSpan]) -> String {
1748    let mut out = format!("---\ntitle: \"{}\"\ntype: {kind}\n", yaml_quote(title));
1749    let mut files: BTreeMap<&str, BTreeSet<(usize, usize)>> = BTreeMap::new();
1750    for span in source_spans {
1751        files
1752            .entry(&span.file)
1753            .or_default()
1754            .insert((span.line_start, span.line_end));
1755    }
1756    if files.is_empty() {
1757        out.push_str("source_files: []\n");
1758        out.push_str("---\n\n");
1759        return out;
1760    }
1761    out.push_str("source_files:\n");
1762    for (file, ranges) in files {
1763        let _ = writeln!(out, "  - file: \"{}\"", yaml_quote(file));
1764        out.push_str("    ranges:\n");
1765        for (line_start, line_end) in ranges {
1766            if line_start == line_end {
1767                let _ = writeln!(out, "      - \"{line_start}\"");
1768            } else {
1769                let _ = writeln!(out, "      - \"{line_start}-{line_end}\"");
1770            }
1771        }
1772    }
1773    out.push_str("---\n\n");
1774    out
1775}
1776
1777fn yaml_quote(value: &str) -> String {
1778    value.replace('\\', "\\\\").replace('"', "\\\"")
1779}
1780
1781fn inline_code(value: &str) -> String {
1782    let value = value.replace('\n', " ");
1783    let delimiter = "`".repeat(max_backtick_run(&value).saturating_add(1));
1784    if value.starts_with('`') || value.ends_with('`') {
1785        format!("{delimiter} {value} {delimiter}")
1786    } else {
1787        format!("{delimiter}{value}{delimiter}")
1788    }
1789}
1790
1791fn max_backtick_run(value: &str) -> usize {
1792    let mut max_run = 0usize;
1793    let mut current_run = 0usize;
1794    for ch in value.chars() {
1795        if ch == '`' {
1796            current_run += 1;
1797            max_run = max_run.max(current_run);
1798        } else {
1799            current_run = 0;
1800        }
1801    }
1802    max_run
1803}
1804
1805fn plural(count: usize) -> &'static str {
1806    if count == 1 { "" } else { "s" }
1807}
1808
1809fn component_id(symbol: &Symbol) -> String {
1810    symbol.id.clone()
1811}
1812
1813fn component_label(symbol: &Symbol) -> String {
1814    let name = if symbol.qualified_name.is_empty() {
1815        &symbol.name
1816    } else {
1817        &symbol.qualified_name
1818    };
1819    format!("{name} [{}]", symbol.kind)
1820}
1821
1822fn is_core_file(file: &str) -> bool {
1823    let lower = file.to_ascii_lowercase();
1824    if lower.contains(".generated.")
1825        || lower.ends_with(".generated.rs")
1826        || lower.ends_with(".gen.rs")
1827        || lower.contains(".test.")
1828        || lower.contains(".spec.")
1829        || lower.ends_with("_test.rs")
1830        || lower.ends_with("_tests.rs")
1831    {
1832        return false;
1833    }
1834    !Path::new(file).components().any(|component| {
1835        let part = component.as_os_str().to_string_lossy().to_ascii_lowercase();
1836        matches!(
1837            part.as_str(),
1838            "test"
1839                | "tests"
1840                | "__tests__"
1841                | "spec"
1842                | "specs"
1843                | "fixture"
1844                | "fixtures"
1845                | "vendor"
1846                | "vendored"
1847                | "third_party"
1848                | "generated"
1849                | "gen"
1850                | "dist"
1851                | "build"
1852                | "target"
1853                | "node_modules"
1854        )
1855    })
1856}
1857
1858fn in_scope(file: &str, scopes: &[String]) -> bool {
1859    scopes.is_empty()
1860        || scopes.iter().any(|scope| scope.is_empty())
1861        || scopes.iter().any(|scope| {
1862            file == scope || file.starts_with(&format!("{}/", scope.trim_end_matches('/')))
1863        })
1864}
1865
1866fn module_for_file(file: &str) -> String {
1867    Path::new(file)
1868        .parent()
1869        .map(|path| path.to_string_lossy().replace('\\', "/"))
1870        .filter(|path| path != ".")
1871        .unwrap_or_default()
1872}
1873
1874fn module_ancestors(module: &str) -> Vec<String> {
1875    let mut out = Vec::new();
1876    let mut current = module;
1877    while !current.is_empty() {
1878        out.push(current.to_string());
1879        current = parent_module(current).unwrap_or("");
1880    }
1881    out
1882}
1883
1884fn parent_module(module: &str) -> Option<&str> {
1885    module.rsplit_once('/').map(|(parent, _)| parent)
1886}
1887
1888fn module_is_ancestor(module: &str, child: &str) -> bool {
1889    !module.is_empty() && child.starts_with(&format!("{module}/"))
1890}
1891
1892fn direct_child_modules<'a>(
1893    module: &str,
1894    candidates: impl Iterator<Item = &'a String>,
1895) -> Vec<String> {
1896    candidates
1897        .filter(|candidate| parent_module(candidate).is_some_and(|parent| parent == module))
1898        .cloned()
1899        .collect()
1900}
1901
1902fn module_depth(module: &str) -> usize {
1903    module.split('/').count()
1904}
1905
1906fn file_doc_path(file: &str) -> String {
1907    format!("files/{file}.md")
1908}
1909
1910fn module_doc_path(module: &str) -> String {
1911    format!("modules/{module}.md")
1912}
1913
1914fn file_wikilink(file: &str) -> String {
1915    format!("[[files/{file}|{file}]]")
1916}
1917
1918fn module_wikilink(module: &str) -> String {
1919    format!("[[modules/{module}|{module}]]")
1920}
1921
1922fn safe_doc_path(out_dir: &Path, relative_path: &str) -> anyhow::Result<PathBuf> {
1923    let path = Path::new(relative_path);
1924    if path.is_absolute()
1925        || path
1926            .components()
1927            .any(|component| matches!(component, std::path::Component::ParentDir))
1928    {
1929        anyhow::bail!("refusing to write unsafe codewiki path: {relative_path}");
1930    }
1931    Ok(out_dir.join(path))
1932}
1933
1934#[cfg(test)]
1935mod tests {
1936    use super::*;
1937
1938    #[test]
1939    fn generates_hierarchical_docs() {
1940        let out_dir = tempfile::tempdir().expect("tempdir");
1941        let input = CodewikiInput {
1942            files: vec!["src/lib.rs".to_string(), "src/nested/api.rs".to_string()],
1943            graph_edges: Vec::new(),
1944            graph_availability: CodewikiGraphAvailability::Available,
1945            symbols: vec![
1946                test_symbol("src/lib.rs", "Client", "class", 1, "pub struct Client {"),
1947                test_symbol("src/lib.rs", "connect", "function", 5, "pub fn connect()"),
1948                test_symbol(
1949                    "src/nested/api.rs",
1950                    "serve",
1951                    "function",
1952                    3,
1953                    "pub fn serve()",
1954                ),
1955            ],
1956        };
1957
1958        let docs = generate_hierarchical_docs(&input, None);
1959        write_doc_set(out_dir.path(), &docs).expect("writes docs");
1960
1961        let repo = std::fs::read_to_string(out_dir.path().join("repo.md")).expect("repo doc");
1962        let module =
1963            std::fs::read_to_string(out_dir.path().join("modules/src.md")).expect("src module doc");
1964        let file =
1965            std::fs::read_to_string(out_dir.path().join("files/src/lib.rs.md")).expect("file doc");
1966
1967        assert!(repo.contains("[[modules/src|src]]"));
1968        assert!(repo.contains("Repository Overview"));
1969        assert!(module.contains("[[files/src/lib.rs|src/lib.rs]]"));
1970        assert!(file.contains("API Symbols"));
1971        assert!(file.contains("pub struct Client {"));
1972        assert!(file.contains("[[modules/src|src]]"));
1973    }
1974
1975    #[test]
1976    fn inline_code_uses_commonmark_backtick_delimiters() {
1977        assert_eq!(inline_code("plain"), "`plain`");
1978        assert_eq!(inline_code("a`b"), "``a`b``");
1979        assert_eq!(inline_code("a``b"), "```a``b```");
1980        assert_eq!(inline_code("`edge`"), "`` `edge` ``");
1981        assert_eq!(inline_code("two\nlines"), "`two lines`");
1982    }
1983
1984    #[test]
1985    fn clusters_modules_from_graph() {
1986        let input = CodewikiInput {
1987            files: vec![
1988                "src/api/handler.rs".to_string(),
1989                "src/domain/service.rs".to_string(),
1990                "tests/domain/service_test.rs".to_string(),
1991                "vendor/generated/client.rs".to_string(),
1992            ],
1993            graph_edges: vec![CodewikiGraphEdge::call(
1994                test_component_id("src/api/handler.rs", "handle", "function"),
1995                test_component_id("src/domain/service.rs", "Service", "class"),
1996            )],
1997            graph_availability: CodewikiGraphAvailability::Available,
1998            symbols: vec![
1999                test_symbol(
2000                    "src/api/handler.rs",
2001                    "handle",
2002                    "function",
2003                    1,
2004                    "pub fn handle()",
2005                ),
2006                test_symbol(
2007                    "src/domain/service.rs",
2008                    "Service",
2009                    "class",
2010                    1,
2011                    "pub struct Service;",
2012                ),
2013                test_symbol_with_qualified(
2014                    "src/domain/service.rs",
2015                    "new",
2016                    "Service::new",
2017                    "function",
2018                    3,
2019                    "pub fn new() -> Self",
2020                ),
2021                test_symbol(
2022                    "tests/domain/service_test.rs",
2023                    "service_test",
2024                    "function",
2025                    1,
2026                    "fn service_test()",
2027                ),
2028                test_symbol(
2029                    "vendor/generated/client.rs",
2030                    "GeneratedClient",
2031                    "class",
2032                    1,
2033                    "pub struct GeneratedClient;",
2034                ),
2035            ],
2036        };
2037
2038        let docs = generate_hierarchical_docs(&input, None);
2039        let docs_by_path = docs.into_iter().collect::<BTreeMap<_, _>>();
2040
2041        let module = docs_by_path
2042            .get("modules/src.md")
2043            .expect("graph-connected files cluster under common module");
2044        assert!(module.contains("[[files/src/api/handler.rs|src/api/handler.rs]]"));
2045        assert!(module.contains("[[files/src/domain/service.rs|src/domain/service.rs]]"));
2046        assert!(module.contains(&test_component_id(
2047            "src/api/handler.rs",
2048            "handle",
2049            "function"
2050        )));
2051        assert!(module.contains(&test_component_id(
2052            "src/domain/service.rs",
2053            "Service",
2054            "class"
2055        )));
2056        assert!(!docs_by_path.contains_key("files/tests/domain/service_test.rs.md"));
2057        assert!(!docs_by_path.contains_key("files/vendor/generated/client.rs.md"));
2058    }
2059
2060    #[test]
2061    fn file_root_detection_breaks_parent_cycles() {
2062        let mut parents = HashMap::from([
2063            ("b.rs".to_string(), "a.rs".to_string()),
2064            ("a.rs".to_string(), "b.rs".to_string()),
2065        ]);
2066
2067        let root = find_file_root(&mut parents, "a.rs");
2068
2069        assert_eq!(root, "a.rs");
2070        assert_eq!(parents.get("a.rs").map(String::as_str), Some("a.rs"));
2071        assert_eq!(parents.get("b.rs").map(String::as_str), Some("a.rs"));
2072    }
2073
2074    #[test]
2075    fn clusters_without_falkordb() {
2076        let input = CodewikiInput {
2077            files: vec![
2078                "src/api/handler.rs".to_string(),
2079                "src/domain/service.rs".to_string(),
2080                "tests/domain/service_test.rs".to_string(),
2081            ],
2082            graph_edges: Vec::new(),
2083            graph_availability: CodewikiGraphAvailability::Unavailable,
2084            symbols: vec![
2085                test_symbol(
2086                    "src/api/handler.rs",
2087                    "handle",
2088                    "function",
2089                    1,
2090                    "pub fn handle()",
2091                ),
2092                test_symbol(
2093                    "src/domain/service.rs",
2094                    "Service",
2095                    "class",
2096                    1,
2097                    "pub struct Service;",
2098                ),
2099                test_symbol_with_qualified(
2100                    "src/domain/service.rs",
2101                    "new",
2102                    "Service::new",
2103                    "function",
2104                    3,
2105                    "pub fn new() -> Self",
2106                ),
2107                test_symbol(
2108                    "tests/domain/service_test.rs",
2109                    "service_test",
2110                    "function",
2111                    1,
2112                    "fn service_test()",
2113                ),
2114            ],
2115        };
2116
2117        let docs = generate_hierarchical_docs(&input, None);
2118        let docs_by_path = docs.into_iter().collect::<BTreeMap<_, _>>();
2119
2120        assert!(docs_by_path.contains_key("modules/src/api.md"));
2121        assert!(docs_by_path.contains_key("modules/src/domain.md"));
2122        assert!(!docs_by_path.contains_key("files/tests/domain/service_test.rs.md"));
2123        assert!(
2124            docs_by_path
2125                .get("files/src/api/handler.rs.md")
2126                .expect("handler file doc")
2127                .contains(&test_component_id(
2128                    "src/api/handler.rs",
2129                    "handle",
2130                    "function"
2131                ))
2132        );
2133        assert!(
2134            docs_by_path
2135                .get("files/src/domain/service.rs.md")
2136                .expect("service file doc")
2137                .contains(&test_component_id(
2138                    "src/domain/service.rs",
2139                    "Service",
2140                    "class"
2141                ))
2142        );
2143        assert!(
2144            docs_by_path
2145                .get("files/src/domain/service.rs.md")
2146                .expect("service file doc")
2147                .contains(&test_component_id(
2148                    "src/domain/service.rs",
2149                    "new",
2150                    "function"
2151                ))
2152        );
2153        assert!(
2154            !docs_by_path
2155                .get("files/src/domain/service.rs.md")
2156                .expect("service file doc")
2157                .contains("src/domain/service.rs::Service::new")
2158        );
2159    }
2160
2161    #[test]
2162    fn emits_bounded_mermaid() {
2163        let input = CodewikiInput {
2164            files: vec![
2165                "src/api/handler.rs".to_string(),
2166                "src/domain/service.rs".to_string(),
2167                "src/storage/repo.rs".to_string(),
2168                "src/unrelated/tool.rs".to_string(),
2169            ],
2170            graph_edges: vec![
2171                CodewikiGraphEdge::import(
2172                    test_component_id("src/api/handler.rs", "handle", "function"),
2173                    test_component_id("src/domain/service.rs", "Service", "class"),
2174                ),
2175                CodewikiGraphEdge::import(
2176                    test_component_id("src/domain/service.rs", "Service", "class"),
2177                    test_component_id("src/storage/repo.rs", "Repo", "class"),
2178                ),
2179                CodewikiGraphEdge::import(
2180                    test_component_id("src/unrelated/tool.rs", "Tool", "class"),
2181                    test_component_id("src/storage/repo.rs", "Repo", "class"),
2182                ),
2183            ],
2184            graph_availability: CodewikiGraphAvailability::Available,
2185            symbols: vec![
2186                test_symbol(
2187                    "src/api/handler.rs",
2188                    "handle",
2189                    "function",
2190                    1,
2191                    "pub fn handle()",
2192                ),
2193                test_symbol(
2194                    "src/domain/service.rs",
2195                    "Service",
2196                    "class",
2197                    1,
2198                    "pub struct Service;",
2199                ),
2200                test_symbol(
2201                    "src/storage/repo.rs",
2202                    "Repo",
2203                    "class",
2204                    1,
2205                    "pub struct Repo;",
2206                ),
2207                test_symbol(
2208                    "src/unrelated/tool.rs",
2209                    "Tool",
2210                    "class",
2211                    1,
2212                    "pub struct Tool;",
2213                ),
2214            ],
2215        };
2216
2217        let docs = generate_hierarchical_docs(&input, None);
2218        let docs_by_path = docs.into_iter().collect::<BTreeMap<_, _>>();
2219        let rendered = docs_by_path
2220            .get("modules/src/api.md")
2221            .expect("api module doc");
2222
2223        assert!(rendered.contains("```mermaid"));
2224        assert!(rendered.contains("graph LR"));
2225        assert!(rendered.contains("m_src_api[\"src/api\"] --> m_src_domain[\"src/domain\"]"));
2226        assert!(
2227            rendered.contains("m_src_domain[\"src/domain\"] --> m_src_storage[\"src/storage\"]")
2228        );
2229        assert!(
2230            !rendered
2231                .contains("m_src_unrelated[\"src/unrelated\"] --> m_src_storage[\"src/storage\"]")
2232        );
2233    }
2234
2235    #[test]
2236    fn mermaid_degrades_without_falkordb() {
2237        let input = CodewikiInput {
2238            files: vec!["src/api/handler.rs".to_string()],
2239            graph_edges: Vec::new(),
2240            graph_availability: CodewikiGraphAvailability::Unavailable,
2241            symbols: vec![test_symbol(
2242                "src/api/handler.rs",
2243                "handle",
2244                "function",
2245                1,
2246                "pub fn handle()",
2247            )],
2248        };
2249
2250        let docs = generate_hierarchical_docs(&input, None);
2251        let docs_by_path = docs.into_iter().collect::<BTreeMap<_, _>>();
2252        let module = docs_by_path
2253            .get("modules/src/api.md")
2254            .expect("module doc still renders");
2255        let file = docs_by_path
2256            .get("files/src/api/handler.rs.md")
2257            .expect("file doc still renders");
2258
2259        assert!(module.contains("degraded: graph-unavailable"));
2260        assert!(file.contains("API Symbols"));
2261        assert!(file.contains(&test_component_id(
2262            "src/api/handler.rs",
2263            "handle",
2264            "function"
2265        )));
2266    }
2267
2268    #[test]
2269    fn empty_available_graph_does_not_emit_degradation_marker() {
2270        let input = CodewikiInput {
2271            files: vec!["src/api/handler.rs".to_string()],
2272            graph_edges: Vec::new(),
2273            graph_availability: CodewikiGraphAvailability::Available,
2274            symbols: vec![test_symbol(
2275                "src/api/handler.rs",
2276                "handle",
2277                "function",
2278                1,
2279                "pub fn handle()",
2280            )],
2281        };
2282
2283        let docs = generate_hierarchical_docs(&input, None);
2284        let docs_by_path = docs.into_iter().collect::<BTreeMap<_, _>>();
2285        let module = docs_by_path
2286            .get("modules/src/api.md")
2287            .expect("module doc still renders");
2288
2289        assert!(!module.contains("degraded: graph-unavailable"));
2290    }
2291
2292    #[test]
2293    fn citations_validated_against_spans() {
2294        let input = CodewikiInput {
2295            files: vec!["src/lib.rs".to_string()],
2296            graph_edges: Vec::new(),
2297            graph_availability: CodewikiGraphAvailability::Available,
2298            symbols: vec![
2299                test_symbol_range(
2300                    "src/lib.rs",
2301                    "Client",
2302                    "class",
2303                    10,
2304                    14,
2305                    "pub struct Client {",
2306                ),
2307                test_symbol_range(
2308                    "src/lib.rs",
2309                    "connect",
2310                    "function",
2311                    20,
2312                    24,
2313                    "pub fn connect()",
2314                ),
2315            ],
2316        };
2317        let mut generator = |prompt: &str, _system: &str| {
2318            if prompt.contains("Client") {
2319                Some("Builds client state [src/lib.rs:999].".to_string())
2320            } else if prompt.contains("connect") {
2321                Some("Opens a connection [src/lib.rs:20].".to_string())
2322            } else {
2323                Some("Coordinates the public API [missing.rs:1].".to_string())
2324            }
2325        };
2326
2327        let docs = generate_hierarchical_docs(&input, Some(&mut generator));
2328        let file_doc = docs
2329            .iter()
2330            .find(|(path, _)| path == "files/src/lib.rs.md")
2331            .map(|(_, content)| content)
2332            .expect("file doc");
2333
2334        assert!(file_doc.contains("source_files:\n"));
2335        assert!(file_doc.contains("  - file: \"src/lib.rs\"\n"));
2336        assert!(file_doc.contains("    ranges:\n"));
2337        assert!(file_doc.contains("      - \"10-14\"\n"));
2338        assert!(file_doc.contains("      - \"20-24\"\n"));
2339        assert!(file_doc.contains("[src/lib.rs:10-14]"));
2340        assert!(file_doc.contains("[src/lib.rs:20]"));
2341        assert!(!file_doc.contains("src/lib.rs:999"));
2342        assert!(!file_doc.contains("missing.rs:1"));
2343    }
2344
2345    #[test]
2346    fn incremental_regenerates_only_changed() {
2347        let project = tempfile::tempdir().expect("project tempdir");
2348        std::fs::create_dir_all(project.path().join("src/nested")).expect("source dirs");
2349        std::fs::write(project.path().join("src/lib.rs"), "pub struct Client;\n")
2350            .expect("write lib");
2351        std::fs::write(
2352            project.path().join("src/nested/api.rs"),
2353            "pub fn serve() {}\n",
2354        )
2355        .expect("write api");
2356        let out_dir = project.path().join("codewiki");
2357
2358        let input = CodewikiInput {
2359            files: vec!["src/lib.rs".to_string(), "src/nested/api.rs".to_string()],
2360            graph_edges: Vec::new(),
2361            graph_availability: CodewikiGraphAvailability::Available,
2362            symbols: vec![
2363                test_symbol("src/lib.rs", "Client", "class", 1, "pub struct Client;"),
2364                test_symbol(
2365                    "src/nested/api.rs",
2366                    "serve",
2367                    "function",
2368                    1,
2369                    "pub fn serve()",
2370                ),
2371            ],
2372        };
2373
2374        let first_docs = generate_hierarchical_docs(&input, None);
2375        let first_written =
2376            write_incremental_doc_set(project.path(), &out_dir, &first_docs).expect("first write");
2377        assert!(first_written.contains(&"repo.md".to_string()));
2378        assert!(first_written.contains(&"modules/src.md".to_string()));
2379        assert!(first_written.contains(&"files/src/lib.rs.md".to_string()));
2380        assert!(first_written.contains(&"files/src/nested/api.rs.md".to_string()));
2381
2382        let unchanged_file_doc = out_dir.join("files/src/nested/api.rs.md");
2383        let mut unchanged_content =
2384            std::fs::read_to_string(&unchanged_file_doc).expect("unchanged doc content");
2385        unchanged_content.push_str("\n<!-- preserve unchanged doc -->\n");
2386        std::fs::write(&unchanged_file_doc, unchanged_content).expect("write unchanged marker");
2387
2388        std::fs::write(
2389            project.path().join("src/lib.rs"),
2390            "pub struct Client;\npub fn connect() {}\n",
2391        )
2392        .expect("modify lib");
2393        let changed_docs = generate_hierarchical_docs(&input, None);
2394        let changed_written = write_incremental_doc_set(project.path(), &out_dir, &changed_docs)
2395            .expect("incremental write");
2396        let unchanged_after =
2397            std::fs::read_to_string(&unchanged_file_doc).expect("unchanged doc after content");
2398
2399        assert!(unchanged_after.contains("preserve unchanged doc"));
2400        assert_eq!(
2401            changed_written,
2402            vec![
2403                "repo.md".to_string(),
2404                "modules/src.md".to_string(),
2405                "files/src/lib.rs.md".to_string()
2406            ]
2407        );
2408        let meta =
2409            std::fs::read_to_string(out_dir.join("_meta/codewiki.json")).expect("read meta log");
2410        let meta: serde_json::Value = serde_json::from_str(&meta).expect("parse meta log");
2411        let generated_docs = meta["generated_docs"].as_array().expect("generated docs");
2412        assert_eq!(
2413            generated_docs,
2414            &vec![
2415                serde_json::Value::String("repo.md".to_string()),
2416                serde_json::Value::String("modules/src.md".to_string()),
2417                serde_json::Value::String("files/src/lib.rs.md".to_string())
2418            ]
2419        );
2420
2421        let reduced_input = CodewikiInput {
2422            files: vec!["src/lib.rs".to_string()],
2423            graph_edges: Vec::new(),
2424            graph_availability: CodewikiGraphAvailability::Available,
2425            symbols: vec![test_symbol(
2426                "src/lib.rs",
2427                "Client",
2428                "class",
2429                1,
2430                "pub struct Client;",
2431            )],
2432        };
2433        let reduced_docs = generate_hierarchical_docs(&reduced_input, None);
2434        write_incremental_doc_set(project.path(), &out_dir, &reduced_docs)
2435            .expect("stale docs removed");
2436
2437        assert!(!unchanged_file_doc.exists());
2438        let meta =
2439            std::fs::read_to_string(out_dir.join("_meta/codewiki.json")).expect("read final meta");
2440        let meta: serde_json::Value = serde_json::from_str(&meta).expect("parse final meta");
2441        assert!(meta["docs"].get("files/src/nested/api.rs.md").is_none());
2442    }
2443
2444    #[test]
2445    fn run_summary_serializes_daemon_contract_keys() {
2446        let summary = CodewikiRunSummary {
2447            command: "codewiki",
2448            project_id: "project-1".to_string(),
2449            project_root: "/repo".to_string(),
2450            out_dir: "/repo/codewiki".to_string(),
2451            generated_pages: 3,
2452            changed_paths: vec!["repo.md".to_string()],
2453            skipped: 2,
2454            files: 1,
2455            modules: 1,
2456            symbols: 4,
2457            ai_enabled: false,
2458        };
2459
2460        let value = serde_json::to_value(summary).expect("summary json");
2461
2462        assert_eq!(value["command"], "codewiki");
2463        assert_eq!(value["project_id"], "project-1");
2464        assert_eq!(value["project_root"], "/repo");
2465        assert_eq!(value["changed_paths"][0], "repo.md");
2466        assert_eq!(value["skipped"], 2);
2467        assert_eq!(value["ai_enabled"], false);
2468    }
2469
2470    #[test]
2471    fn component_id_uses_stored_symbol_id() {
2472        let mut symbol = test_symbol("src/lib.rs", "Client", "class", 1, "pub struct Client;");
2473        symbol.id = "stored-symbol-id".to_string();
2474        assert_eq!(component_id(&symbol), "stored-symbol-id");
2475    }
2476
2477    #[test]
2478    #[cfg(unix)]
2479    fn write_doc_rejects_symlinked_parent() {
2480        use std::os::unix::fs::symlink;
2481
2482        let project = tempfile::tempdir().expect("project tempdir");
2483        let out_dir = project.path().join("codewiki");
2484        let outside = tempfile::tempdir().expect("outside tempdir");
2485        std::fs::create_dir_all(&out_dir).expect("out dir");
2486        symlink(outside.path(), out_dir.join("linked")).expect("symlink parent");
2487
2488        let err = write_doc(&out_dir, "linked/escape.md", "escaped")
2489            .expect_err("symlink parent should be rejected");
2490
2491        assert!(err.to_string().contains("symlinked codewiki path"));
2492        assert!(!outside.path().join("escape.md").exists());
2493    }
2494
2495    #[test]
2496    #[cfg(unix)]
2497    fn write_doc_rejects_symlinked_target() {
2498        use std::os::unix::fs::symlink;
2499
2500        let project = tempfile::tempdir().expect("project tempdir");
2501        let out_dir = project.path().join("codewiki");
2502        let outside = tempfile::tempdir().expect("outside tempdir");
2503        std::fs::create_dir_all(&out_dir).expect("out dir");
2504        let outside_target = outside.path().join("target.md");
2505        symlink(&outside_target, out_dir.join("target.md")).expect("symlink target");
2506
2507        let err = write_doc(&out_dir, "target.md", "escaped").expect_err("symlink target rejected");
2508
2509        assert!(err.to_string().contains("symlinked codewiki path"));
2510        assert!(!outside_target.exists());
2511    }
2512
2513    fn test_symbol(
2514        file_path: &str,
2515        name: &str,
2516        kind: &str,
2517        line_start: usize,
2518        signature: &str,
2519    ) -> Symbol {
2520        test_symbol_with_qualified(file_path, name, name, kind, line_start, signature)
2521    }
2522
2523    fn test_component_id(file_path: &str, name: &str, kind: &str) -> String {
2524        Symbol::make_id("project-1", file_path, name, kind, 0)
2525    }
2526
2527    fn test_symbol_with_qualified(
2528        file_path: &str,
2529        name: &str,
2530        qualified_name: &str,
2531        kind: &str,
2532        line_start: usize,
2533        signature: &str,
2534    ) -> Symbol {
2535        Symbol {
2536            id: Symbol::make_id("project-1", file_path, name, kind, 0),
2537            project_id: "project-1".to_string(),
2538            file_path: file_path.to_string(),
2539            name: name.to_string(),
2540            qualified_name: qualified_name.to_string(),
2541            kind: kind.to_string(),
2542            language: "rust".to_string(),
2543            byte_start: 0,
2544            byte_end: 0,
2545            line_start,
2546            line_end: line_start,
2547            signature: Some(signature.to_string()),
2548            docstring: None,
2549            parent_symbol_id: None,
2550            content_hash: String::new(),
2551            summary: None,
2552            created_at: String::new(),
2553            updated_at: String::new(),
2554        }
2555    }
2556
2557    fn test_symbol_range(
2558        file_path: &str,
2559        name: &str,
2560        kind: &str,
2561        line_start: usize,
2562        line_end: usize,
2563        signature: &str,
2564    ) -> Symbol {
2565        Symbol {
2566            id: Symbol::make_id("project-1", file_path, name, kind, 0),
2567            project_id: "project-1".to_string(),
2568            file_path: file_path.to_string(),
2569            name: name.to_string(),
2570            qualified_name: name.to_string(),
2571            kind: kind.to_string(),
2572            language: "rust".to_string(),
2573            byte_start: 0,
2574            byte_end: 0,
2575            line_start,
2576            line_end,
2577            signature: Some(signature.to_string()),
2578            docstring: None,
2579            parent_symbol_id: None,
2580            content_hash: String::new(),
2581            summary: None,
2582            created_at: String::new(),
2583            updated_at: String::new(),
2584        }
2585    }
2586}