use super::super::*;
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
#[allow(clippy::too_many_arguments)]
#[cfg(test)]
pub(crate) fn build_module_docs(
files: &[FileDoc],
leading_chunks: &BTreeMap<String, LeadingChunk>,
generate: &mut Option<&mut TextGenerator<'_>>,
reuse: &mut Option<&mut ReusePlan>,
progress: &mut CodewikiProgress,
emit: &mut dyn FnMut(&ModuleDoc) -> anyhow::Result<()>,
) -> anyhow::Result<Vec<ModuleDoc>> {
build_module_docs_with_filter(
files,
leading_chunks,
&[],
generate,
reuse,
progress,
&|_: &str| true,
emit,
)
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn build_module_docs_with_filter(
files: &[FileDoc],
leading_chunks: &BTreeMap<String, LeadingChunk>,
graph_edges: &[CodewikiGraphEdge],
generate: &mut Option<&mut TextGenerator<'_>>,
reuse: &mut Option<&mut ReusePlan>,
progress: &mut CodewikiProgress,
module_filter: &dyn Fn(&str) -> bool,
emit: &mut dyn FnMut(&ModuleDoc) -> anyhow::Result<()>,
) -> anyhow::Result<Vec<ModuleDoc>> {
let symbols_by_id = files
.iter()
.flat_map(|file| {
file.symbols
.iter()
.map(|doc| (doc.symbol.id.as_str(), &doc.symbol))
})
.collect::<HashMap<_, _>>();
let mut module_names = BTreeSet::new();
for file in files {
for module in module_ancestors(&file.module) {
module_names.insert(module);
}
for module in module_ancestors(&module_for_file(&file.path)) {
module_names.insert(module);
}
}
let mut module_summaries: BTreeMap<String, String> = BTreeMap::new();
let mut module_sources: BTreeMap<String, Vec<SourceSpan>> = BTreeMap::new();
let mut modules = module_names
.into_iter()
.filter(|module| module_filter(module))
.collect::<Vec<_>>();
modules.sort_by_key(|module| std::cmp::Reverse(module_depth(module)));
let mut docs = Vec::new();
let module_total = modules.len();
for (index, module) in modules.into_iter().enumerate() {
let mut seen_direct_files = BTreeSet::new();
let direct_files = files
.iter()
.filter(|file| {
file_is_direct_module_member(file, &module)
&& seen_direct_files.insert(file.path.clone())
})
.map(|file| FileLink {
path: file.path.clone(),
summary: file.summary.clone(),
source_spans: file.source_spans.clone(),
})
.collect::<Vec<_>>();
let child_modules = direct_child_modules(&module, module_summaries.keys())
.into_iter()
.map(|child| ModuleLink {
summary: module_summaries.get(&child).cloned().unwrap_or_default(),
source_spans: module_sources.get(&child).cloned().unwrap_or_default(),
module: child,
})
.collect::<Vec<_>>();
let file_summaries = direct_files
.iter()
.map(|file| prompts::ChildSummary {
name: file.path.clone(),
summary: file.summary.clone(),
})
.collect::<Vec<_>>();
let child_summaries = child_modules
.iter()
.map(|module| prompts::ChildSummary {
name: module.module.clone(),
summary: module.summary.clone(),
})
.collect::<Vec<_>>();
let prompt_component_ids = prompt_component_ids_for_module(files, &module);
let fallback = structural_module_summary(&module, &direct_files, &child_modules);
let source_spans = collect_link_spans(&direct_files, &child_modules);
let reused = reuse.as_deref_mut().and_then(|plan| {
plan.reusable_page_with_summary(&module_doc_path(&module), &span_files(&source_spans))
});
progress.emit(format!(
"{} module doc module {}/{} {}",
if reused.is_some() {
"reusing"
} else {
"generating"
},
index + 1,
module_total,
module
));
let mut degraded = false;
let (summary, reused_page) = match reused {
Some((page, summary)) => (summary, Some(page)),
None => {
let sources = ranked_source_excerpts(
files
.iter()
.filter(|file| file_is_direct_module_member(file, &module)),
leading_chunks,
prompts::MAX_PROMPT_SOURCE_EXCERPTS,
);
let member_ids = files
.iter()
.filter(|file| {
file.module == module || module_is_ancestor(&module, &file.module)
})
.flat_map(|file| file.component_ids.iter().map(String::as_str))
.collect::<HashSet<&str>>();
let relationships =
relationship_facts_for_file(&module, &member_ids, &symbols_by_id, graph_edges);
let generated = maybe_generate(
generate,
&prompts::module_prompt(
&module,
&file_summaries,
&child_summaries,
&prompt_component_ids,
&sources,
&relationships,
),
prompts::MODULE_SYSTEM,
PromptTier::Module,
)
.unwrap_or_record(fallback, &mut degraded);
let mut grounding_spans = source_spans.clone();
grounding_spans.extend(relationships.endpoint_spans());
let citations = citation_list(&grounding_spans, &generated);
let summary = ground_text(&generated, &grounding_spans, Some(&citations));
(summary, None)
}
};
module_summaries.insert(module.clone(), summary.clone());
module_sources.insert(module.clone(), source_spans.clone());
let mut degraded_sources = BTreeSet::new();
if degraded {
degraded_sources.insert("model-unavailable".to_string());
}
let doc = ModuleDoc {
module,
summary,
source_spans,
direct_files,
child_modules,
degraded,
degraded_sources: degraded_sources.into_iter().collect(),
verify_notes: Vec::new(),
reused_page,
};
emit(&doc)?;
docs.push(doc);
}
docs.sort_by(|a, b| a.module.cmp(&b.module));
Ok(docs)
}
fn file_is_direct_module_member(file: &FileDoc, module: &str) -> bool {
file.module == module || module_for_file(&file.path) == module
}
pub(super) fn prompt_component_ids_for_module(files: &[FileDoc], module: &str) -> Vec<String> {
files
.iter()
.filter(|file| file.module == module || module_is_ancestor(module, &file.module))
.flat_map(|file| {
file.symbols
.iter()
.map(|symbol| format!("{} ({})", symbol.component_label, symbol.component_id))
})
.collect()
}