use crate::generator::preprocess::memory::{MemoryScope, ScopedKeys};
use crate::generator::research::memory::MemoryRetriever;
use crate::generator::research::types::{
AgentType, DomainModule, DomainModulesReport, KeyModuleReport, SubModule,
};
use crate::generator::{
agent_executor::{AgentExecuteParams, extract},
context::GeneratorContext,
step_forward_agent::{
AgentDataConfig, DataSource, FormatterConfig, LLMCallMode, PromptTemplate, StepForwardAgent,
},
};
use crate::types::code::CodeInsight;
use crate::utils::threads::do_parallel_with_limit;
use anyhow::{Result, anyhow};
use async_trait::async_trait;
use std::collections::HashSet;
#[derive(Default, Clone)]
pub struct KeyModulesInsight;
#[async_trait]
impl StepForwardAgent for KeyModulesInsight {
type Output = Vec<KeyModuleReport>;
fn agent_type(&self) -> String {
AgentType::KeyModulesInsight.to_string()
}
fn agent_type_enum(&self) -> Option<AgentType> {
Some(AgentType::KeyModulesInsight)
}
fn memory_scope_key(&self) -> String {
crate::generator::research::memory::MemoryScope::STUDIES_RESEARCH.to_string()
}
fn data_config(&self) -> AgentDataConfig {
AgentDataConfig {
required_sources: vec![
DataSource::ResearchResult(AgentType::SystemContextResearcher.to_string()),
DataSource::ResearchResult(AgentType::DomainModulesDetector.to_string()),
],
optional_sources: vec![DataSource::knowledge_categories(vec!["architecture", "database"])],
}
}
fn prompt_template(&self) -> PromptTemplate {
PromptTemplate {
system_prompt: r#"You are a software development expert. Based on the information provided by the user, investigate the technical details of core modules.
You may have access to existing product description, requirements and architecture documentation from external sources.
If available:
- Reference documented component responsibilities and interfaces
- Validate implementation against documented design patterns
- Use established terminology for components and modules
- Identify any gaps between documented and actual component behavior
- Incorporate design rationale and constraints from the documentation"#
.to_string(),
opening_instruction: "Based on the following project information and research materials, analyze the core modules:".to_string(),
closing_instruction: "".to_string(),
llm_call_mode: LLMCallMode::Extract,
formatter_config: FormatterConfig::default(),
}
}
async fn execute(&self, context: &GeneratorContext) -> Result<Self::Output> {
let reports = self.execute_multi_domain_analysis(context).await?;
let value = serde_json::to_value(&reports)?;
context
.store_to_memory(&self.memory_scope_key(), &self.agent_type(), value.clone())
.await?;
Ok(reports)
}
}
impl KeyModulesInsight {
async fn execute_multi_domain_analysis(
&self,
context: &GeneratorContext,
) -> Result<Vec<KeyModuleReport>> {
println!("🔍 Starting multi-domain module analysis...");
let mut reports = vec![];
let max_parallels = context.config.llm.max_parallels;
let domain_modules = self.get_domain_modules(context).await?;
if domain_modules.is_empty() {
return Err(anyhow!("No domain module data found"));
}
let domain_names: Vec<String> = domain_modules.iter().map(|d| d.name.clone()).collect();
println!(
"📋 Discovered {} domain modules: {}",
domain_modules.len(),
domain_names.join(", ")
);
println!("🚀 Starting concurrent analysis, max parallelism: {}", max_parallels);
let analysis_futures: Vec<_> = domain_modules
.iter()
.map(|domain| {
let domain_clone = domain.clone();
let context_clone = context.clone();
Box::pin(async move {
let key_modules_insight = KeyModulesInsight::default();
let result = key_modules_insight
.analyze_single_domain(&domain_clone, &context_clone)
.await;
(domain_clone.name.clone(), result)
})
})
.collect();
let analysis_results = do_parallel_with_limit(analysis_futures, max_parallels).await;
let mut successful_analyses = 0;
for (domain_name, result) in analysis_results {
match result {
Ok(report) => {
let storage_key = format!("{}_{}", self.agent_type(), domain_name);
context
.store_research(&storage_key, serde_json::to_value(&report)?)
.await?;
successful_analyses += 1;
reports.push(report);
println!("✅ Domain module analysis: {} completed and stored", domain_name);
}
Err(e) => {
let msg = context.config.target_language.msg_domain_analysis_failed();
println!("{}", msg.replace("{}", &domain_name).replace("{}", &e.to_string()));
}
}
}
if successful_analyses == 0 {
return Err(anyhow!("All domain analyses failed"));
}
Ok(reports)
}
}
impl KeyModulesInsight {
async fn get_domain_modules(&self, context: &GeneratorContext) -> Result<Vec<DomainModule>> {
let domain_report = context
.get_research(&AgentType::DomainModulesDetector.to_string())
.await
.ok_or_else(|| anyhow!("DomainModulesDetector result is not available"))?;
let domain_modules_report: DomainModulesReport = serde_json::from_value(domain_report)?;
Ok(domain_modules_report.domain_modules)
}
async fn filter_code_insights_for_domain(
&self,
domain: &DomainModule,
context: &GeneratorContext,
) -> Result<Vec<CodeInsight>> {
let all_insights = context
.get_from_memory::<Vec<CodeInsight>>(MemoryScope::PREPROCESS, ScopedKeys::CODE_INSIGHTS)
.await
.expect("memory of CODE_INSIGHTS not found in PREPROCESS");
let mut domain_paths: HashSet<String> = HashSet::new();
for path in &domain.code_paths {
domain_paths.insert(path.clone());
}
for sub in &domain.sub_modules {
for path in &sub.code_paths {
domain_paths.insert(path.clone());
}
}
if domain_paths.is_empty() {
let msg = context.config.target_language.msg_no_code_path_for_domain();
println!("{}", msg.replace("{}", &domain.name));
return Ok(Vec::new());
}
let filtered: Vec<CodeInsight> = all_insights
.into_iter()
.filter(|insight| {
let file_path = insight.code_dossier.file_path.to_string_lossy();
let file_path = file_path.replace('\\', "/");
domain_paths.iter().any(|path| {
let path = path.replace('\\', "/");
file_path.contains(&path) || path.contains(&file_path)
})
})
.take(50)
.collect();
println!(
"📁 Filtered {} related code files for domain '{}'",
filtered.len(),
domain.name
);
Ok(filtered)
}
async fn analyze_single_domain(
&self,
domain: &DomainModule,
context: &GeneratorContext,
) -> Result<KeyModuleReport> {
let filtered_insights = self
.filter_code_insights_for_domain(domain, context)
.await?;
let (system_prompt, user_prompt) = self.build_domain_prompt(domain, &filtered_insights);
let params = AgentExecuteParams {
prompt_sys: system_prompt,
prompt_user: user_prompt,
cache_scope: format!(
"{}/{}/{}",
crate::generator::research::memory::MemoryScope::STUDIES_RESEARCH.to_string(),
self.agent_type(),
domain.name
),
log_tag: format!("{} domain analysis", domain.name),
};
println!("🤖 Analyzing '{}' domain...", domain.name);
let mut report: KeyModuleReport = extract(context, params).await?;
report.domain_name = domain.name.clone();
if report.module_name.is_empty() {
report.module_name = format!("{} Core Module", domain.name);
}
println!("✅ '{}' domain analysis completed", domain.name);
Ok(report)
}
fn build_domain_prompt(
&self,
domain: &DomainModule,
insights: &[CodeInsight],
) -> (String, String) {
let system_prompt =
"Based on the provided domain and code insights, conduct in-depth analysis and return strict JSON only.
Output requirements (no markdown, no code fences, no prose outside JSON):
{
\"domain_name\": \"string\",
\"module_name\": \"string\",
\"module_description\": \"string\",
\"interaction\": \"string\",
\"implementation\": \"string\",
\"associated_files\": [\"string\"],
\"flowchart_mermaid\": \"string\",
\"sequence_diagram_mermaid\": \"string\"
}
Rules:
- Include all fields every time.
- Use plain strings for all textual fields.
- associated_files must be an array of strings.
- If uncertain, use empty strings/empty array.
- Mermaid fields must be valid mermaid text or empty string.
"
.to_string();
let user_prompt = format!(
"## Domain Analysis Task\nAnalyze the core module technical details of the '{}' domain\n\n### Domain Information\n- Domain Name: {}\n- Domain Type: {}\n- Importance: {:.1}/10\n- Complexity: {:.1}/10\n- Description: {}\n\n### Submodule Overview\n{}\n\n### Related Code Insights\n{}\n",
domain.name,
domain.name,
domain.domain_type,
domain.importance,
domain.complexity,
domain.description,
self.format_sub_modules(&domain.sub_modules),
self.format_filtered_insights(insights)
);
(system_prompt, user_prompt)
}
fn format_sub_modules(&self, sub_modules: &[SubModule]) -> String {
if sub_modules.is_empty() {
return "No submodule information available".to_string();
}
sub_modules.iter()
.enumerate()
.map(|(i, sub)| format!(
"{}. **{}**\n - Description: {}\n - Importance: {:.1}/10\n - Core Functions: {}\n - Code Files: {}",
i + 1,
sub.name,
sub.description,
sub.importance,
sub.key_functions.join(", "),
sub.code_paths.join(", ")
))
.collect::<Vec<_>>()
.join("\n\n")
}
fn format_filtered_insights(&self, insights: &[CodeInsight]) -> String {
if insights.is_empty() {
return "No related code insights available".to_string();
}
insights
.iter()
.enumerate()
.map(|(i, insight)| {
format!(
"{}. File `{}`, Purpose: {}\n Description: {}\n Source Code\n```code\n{}```\n---\n",
i + 1,
insight.code_dossier.file_path.to_string_lossy(),
insight.code_dossier.code_purpose,
insight.detailed_description,
insight.code_dossier.source_summary
)
})
.collect::<Vec<_>>()
.join("\n")
}
}