use anyhow::{Result, anyhow};
use async_trait::async_trait;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use crate::generator::agent_executor::{AgentExecuteParams, extract, prompt, prompt_with_tools};
use crate::generator::preprocess::memory::{MemoryScope, ScopedKeys};
use crate::generator::research::memory::MemoryRetriever;
use crate::{
generator::context::GeneratorContext,
types::{
code::CodeInsight, code_releationship::RelationshipAnalysis,
project_structure::ProjectStructure,
},
utils::project_structure_formatter::ProjectStructureFormatter,
utils::prompt_compressor::{CompressionConfig, PromptCompressor},
};
pub fn replace_time_placeholders(content: &str) -> String {
let now = chrono::Utc::now();
content
.replace(
"__CURRENT_UTC_TIME__",
&format!("{} (UTC)", now.format("%Y-%m-%d %H:%M:%S")),
)
.replace("__CURRENT_TIMESTAMP__", &now.timestamp().to_string())
}
#[derive(Debug, Clone, PartialEq)]
pub enum DataSource {
MemoryData {
scope: &'static str,
key: &'static str,
},
ResearchResult(String),
ExternalKnowledgeByCategory(Vec<String>),
}
impl DataSource {
pub const PROJECT_STRUCTURE: DataSource = DataSource::MemoryData {
scope: MemoryScope::PREPROCESS,
key: ScopedKeys::PROJECT_STRUCTURE,
};
pub const CODE_INSIGHTS: DataSource = DataSource::MemoryData {
scope: MemoryScope::PREPROCESS,
key: ScopedKeys::CODE_INSIGHTS,
};
pub const DEPENDENCY_ANALYSIS: DataSource = DataSource::MemoryData {
scope: MemoryScope::PREPROCESS,
key: ScopedKeys::RELATIONSHIPS,
};
pub const README_CONTENT: DataSource = DataSource::MemoryData {
scope: MemoryScope::PREPROCESS,
key: ScopedKeys::ORIGINAL_DOCUMENT,
};
pub fn knowledge_categories(categories: Vec<&str>) -> DataSource {
DataSource::ExternalKnowledgeByCategory(categories.iter().map(|s| s.to_string()).collect())
}
}
#[derive(Debug, Clone)]
pub struct AgentDataConfig {
pub required_sources: Vec<DataSource>,
pub optional_sources: Vec<DataSource>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum LLMCallMode {
Extract,
#[allow(dead_code)]
Prompt,
PromptWithTools,
}
#[derive(Debug, Clone)]
pub struct FormatterConfig {
pub only_directories_when_files_more_than: Option<usize>,
pub code_insights_limit: usize,
pub include_source_code: bool,
pub dependency_limit: usize,
pub readme_truncate_length: Option<usize>,
pub enable_compression: bool,
pub compression_config: CompressionConfig,
}
impl Default for FormatterConfig {
fn default() -> Self {
Self {
code_insights_limit: 50,
include_source_code: false,
dependency_limit: 50,
readme_truncate_length: Some(16384),
enable_compression: true,
compression_config: CompressionConfig::default(),
only_directories_when_files_more_than: None,
}
}
}
#[derive(Debug, Clone)]
pub struct PromptTemplate {
pub system_prompt: String,
pub opening_instruction: String,
pub closing_instruction: String,
pub llm_call_mode: LLMCallMode,
pub formatter_config: FormatterConfig,
}
pub struct DataFormatter {
config: FormatterConfig,
prompt_compressor: Option<PromptCompressor>,
}
impl DataFormatter {
pub fn new(config: FormatterConfig) -> Self {
let prompt_compressor = if config.enable_compression {
Some(PromptCompressor::new(config.compression_config.clone()))
} else {
None
};
Self {
config,
prompt_compressor,
}
}
pub fn format_project_structure(&self, structure: &ProjectStructure) -> String {
let config = &self.config;
if let Some(files_limit) = config.only_directories_when_files_more_than {
if structure.total_files > files_limit {
return ProjectStructureFormatter::format_as_directory_tree(structure);
}
}
ProjectStructureFormatter::format_as_tree(structure)
}
pub fn format_code_insights(&self, insights: &[CodeInsight]) -> String {
let config = &self.config;
let mut sorted_insights: Vec<_> = insights.iter().collect();
sorted_insights.sort_by(|a, b| {
b.code_dossier
.importance_score
.partial_cmp(&a.code_dossier.importance_score)
.unwrap_or(std::cmp::Ordering::Equal)
});
let mut content = String::from("### Source Code Insights Summary\n");
for (i, insight) in sorted_insights
.iter()
.take(self.config.code_insights_limit)
.enumerate()
{
content.push_str(&format!(
"{}. File `{}`, purpose type is `{}`, importance: {:.2}\n",
i + 1,
insight.code_dossier.file_path.to_string_lossy(),
insight.code_dossier.code_purpose,
insight.code_dossier.importance_score
));
if !insight.detailed_description.is_empty() {
content.push_str(&format!(" Detailed description: {}\n", &insight.detailed_description));
}
if config.include_source_code {
content.push_str(&format!(
" Source code details: ```code\n{}\n\n",
&insight.code_dossier.source_summary
));
}
}
content.push_str("\n");
content
}
pub fn format_readme_content(&self, readme: &str) -> String {
let content = if let Some(limit) = self.config.readme_truncate_length {
if readme.len() > limit {
format!("{}...(truncated)", &readme[..limit])
} else {
readme.to_string()
}
} else {
readme.to_string()
};
format!(
"### Previous README Content (Manually entered information, may not be accurate, for reference only)\n{}\n\n",
content
)
}
pub fn format_dependency_analysis(&self, deps: &RelationshipAnalysis) -> String {
let mut content = String::from("### Dependency Relationship Analysis\n");
let mut sorted_deps: Vec<_> = deps.core_dependencies.iter().collect();
sorted_deps.sort_by(|a, b| {
let a_priority = self.get_dependency_priority(&a.dependency_type);
let b_priority = self.get_dependency_priority(&b.dependency_type);
b_priority.cmp(&a_priority)
});
for rel in sorted_deps.iter().take(self.config.dependency_limit) {
content.push_str(&format!(
"{} -> {} ({})\n",
rel.from,
rel.to,
rel.dependency_type.as_str()
));
}
content.push_str("\n");
content
}
fn emergency_truncate(&self, content: &str, content_type: &str) -> Result<String> {
let truncate_ratio = if content_type == "Code Insights" {
0.2 } else {
0.4 };
let target_len = (content.len() as f64 * truncate_ratio) as usize;
if content.len() <= target_len + 100 {
return Ok(content.to_string());
}
let truncated: String = content
.chars()
.take(target_len)
.collect();
let safe_end = truncated.rfind('\n').unwrap_or(target_len);
let result = if safe_end > 100 {
format!("{}\n\n[Content truncated due to size limitations]",
&truncated[..safe_end])
} else {
format!("{}\n\n[Content truncated due to size limitations]",
truncated)
};
println!(" 🚨 Emergency truncation for [{}]: reduced from {} to {} characters",
content_type, content.len(), result.len());
Ok(result)
}
fn get_dependency_priority(
&self,
dep_type: &crate::types::code_releationship::DependencyType,
) -> u8 {
use crate::types::code_releationship::DependencyType;
match dep_type {
DependencyType::Import => 10,
DependencyType::FunctionCall => 8,
DependencyType::Inheritance => 9,
DependencyType::Composition => 7,
DependencyType::DataFlow => 6,
DependencyType::Module => 5,
}
}
pub fn format_research_results(&self, results: &HashMap<String, serde_json::Value>) -> String {
let mut content = String::from("### Existing Research Results\n");
for (key, value) in results {
content.push_str(&format!(
"#### {}:\n{}\n\n",
key,
serde_json::to_string_pretty(value).unwrap_or_default()
));
}
content
}
pub async fn compress_content_if_needed(
&self,
context: &GeneratorContext,
content: &str,
content_type: &str,
) -> Result<String> {
if let Some(compressor) = &self.prompt_compressor {
match compressor
.compress_if_needed(context, content, content_type)
.await
{
Ok(compression_result) => {
if compression_result.was_compressed {
println!(" 📊 {}", compression_result.compression_summary);
}
Ok(compression_result.compressed_content)
}
Err(e) => {
println!(" ⚠️ Compression failed for [{}]: {}, attempting emergency truncation", content_type, e);
self.emergency_truncate(content, content_type)
}
}
} else {
Ok(content.to_string())
}
}
}
pub struct GeneratorPromptBuilder {
template: PromptTemplate,
formatter: DataFormatter,
}
impl GeneratorPromptBuilder {
pub fn new(template: PromptTemplate) -> Self {
let formatter = DataFormatter::new(template.formatter_config.clone());
Self {
template,
formatter,
}
}
pub async fn build_prompts(
&self,
context: &GeneratorContext,
data_sources: &[DataSource],
custom_content: Option<String>,
include_timestamp: bool,
agent_filter: Option<&str>,
) -> Result<(String, String)> {
let system_prompt = self.template.system_prompt.clone();
let user_prompt = self
.build_standard_user_prompt(context, data_sources, custom_content, include_timestamp, agent_filter)
.await?;
Ok((system_prompt, user_prompt))
}
async fn build_standard_user_prompt(
&self,
context: &GeneratorContext,
data_sources: &[DataSource],
custom_content: Option<String>,
include_timestamp: bool,
agent_filter: Option<&str>,
) -> Result<String> {
let mut prompt = String::new();
prompt.push_str(&self.template.opening_instruction);
prompt.push_str("\n\n");
if include_timestamp {
prompt.push_str(
"## Current Time Information\nGeneration time: __CURRENT_UTC_TIME__\nTimestamp: __CURRENT_TIMESTAMP__\n\n"
);
}
prompt.push_str("## Research Materials Reference\n");
if let Some(custom) = custom_content {
prompt.push_str(&custom);
prompt.push_str("\n");
}
let mut research_results = HashMap::new();
for source in data_sources {
match source {
DataSource::MemoryData { scope, key } => match *key {
ScopedKeys::PROJECT_STRUCTURE => {
if let Some(structure) = context
.get_from_memory::<ProjectStructure>(scope, key)
.await
{
let formatted = self.formatter.format_project_structure(&structure);
let compressed = self
.formatter
.compress_content_if_needed(context, &formatted, "Project Structure")
.await?;
prompt.push_str(&compressed);
}
}
ScopedKeys::CODE_INSIGHTS => {
if let Some(insights) = context
.get_from_memory::<Vec<CodeInsight>>(scope, key)
.await
{
let formatted = self.formatter.format_code_insights(&insights);
let compressed = self
.formatter
.compress_content_if_needed(context, &formatted, "Code Insights")
.await?;
prompt.push_str(&compressed);
}
}
ScopedKeys::ORIGINAL_DOCUMENT => {
if let Some(readme) = context.get_from_memory::<String>(scope, key).await {
let formatted = self.formatter.format_readme_content(&readme);
let compressed = self
.formatter
.compress_content_if_needed(context, &formatted, "README Document")
.await?;
prompt.push_str(&compressed);
}
}
ScopedKeys::RELATIONSHIPS => {
if let Some(deps) = context
.get_from_memory::<RelationshipAnalysis>(scope, key)
.await
{
let formatted = self.formatter.format_dependency_analysis(&deps);
let compressed = self
.formatter
.compress_content_if_needed(context, &formatted, "Dependencies")
.await?;
prompt.push_str(&compressed);
}
}
_ => {}
},
DataSource::ResearchResult(agent_type) => {
if let Some(result) = context.get_research(agent_type).await {
research_results.insert(agent_type.clone(), result);
}
}
DataSource::ExternalKnowledgeByCategory(categories) => {
let category_refs: Vec<&str> = categories.iter().map(|s| s.as_str()).collect();
if let Some(knowledge) = context
.load_external_knowledge_by_categories(&category_refs, agent_filter)
.await
{
let cat_names = categories.join(", ");
let formatted = format!("### External Knowledge ({})\n{}\n\n", cat_names, knowledge);
let compressed = self
.formatter
.compress_content_if_needed(context, &formatted, &format!("Knowledge: {}", cat_names))
.await?;
prompt.push_str(&compressed);
}
}
}
}
if !research_results.is_empty() {
let formatted = self.formatter.format_research_results(&research_results);
let compressed = self
.formatter
.compress_content_if_needed(context, &formatted, "Research Results")
.await?;
prompt.push_str(&compressed);
}
prompt.push_str(&self.template.closing_instruction);
self.formatter
.compress_content_if_needed(context, &prompt, "StepForwardAgent_prompt_full")
.await
}
}
#[async_trait]
pub trait StepForwardAgent: Send + Sync {
type Output: JsonSchema + for<'a> Deserialize<'a> + Serialize + Send + Sync + 'static;
fn agent_type(&self) -> String;
fn agent_type_enum(&self) -> Option<crate::generator::research::types::AgentType> {
None
}
fn memory_scope_key(&self) -> String;
fn data_config(&self) -> AgentDataConfig;
fn prompt_template(&self) -> PromptTemplate;
fn post_process(&self, _result: &Self::Output, _context: &GeneratorContext) -> Result<()> {
Ok(())
}
async fn provide_custom_prompt_content(&self, _context: &GeneratorContext) -> Result<Option<String>> {
Ok(None)
}
fn should_include_timestamp(&self) -> bool {
false
}
async fn execute(&self, context: &GeneratorContext) -> Result<Self::Output> {
let config = self.data_config();
let agent_type_value = self.agent_type();
for source in &config.required_sources {
match source {
DataSource::MemoryData { scope, key } => {
if !context.has_memory_data(scope, key).await {
return Err(anyhow!("Required data source {}:{} is not available", scope, key));
}
}
DataSource::ResearchResult(agent_type) => {
if context.get_research(agent_type).await.is_none() {
return Err(anyhow!("Required research result {} is not available", agent_type));
}
}
DataSource::ExternalKnowledgeByCategory(_) => {
}
}
}
let all_sources = [config.required_sources, config.optional_sources].concat();
let template = self.prompt_template();
let language_instruction = context.config.target_language.prompt_instruction();
let prompt_builder = GeneratorPromptBuilder::new(template.clone());
let custom_content = self.provide_custom_prompt_content(context).await?;
let include_timestamp = self.should_include_timestamp();
let (system_prompt, user_prompt) = prompt_builder
.build_prompts(context, &all_sources, custom_content, include_timestamp, Some(agent_type_value.as_str()))
.await?;
let system_prompt = format!("{}\n\n{}", system_prompt, language_instruction);
let user_prompt = format!("{}\n\n{}", user_prompt, language_instruction);
let log_tag = if let Some(agent_enum) = self.agent_type_enum() {
agent_enum.display_name(&context.config.target_language)
} else {
agent_type_value.clone()
};
let params = AgentExecuteParams {
prompt_sys: system_prompt,
prompt_user: user_prompt,
cache_scope: format!("{}/{}", self.memory_scope_key(), agent_type_value.as_str()),
log_tag,
};
let result_value = match template.llm_call_mode {
LLMCallMode::Extract => {
let result: Self::Output = extract(context, params).await?;
serde_json::to_value(&result)?
}
LLMCallMode::Prompt => {
let result_text: String = prompt(context, params).await?;
let processed_text = replace_time_placeholders(&result_text);
serde_json::to_value(&processed_text)?
}
LLMCallMode::PromptWithTools => {
let result_text: String = prompt_with_tools(context, params).await?;
let processed_text = replace_time_placeholders(&result_text);
serde_json::to_value(&processed_text)?
}
};
context
.store_to_memory(
&self.memory_scope_key(),
agent_type_value.as_str(),
result_value.clone(),
)
.await?;
if let Ok(typed_result) = serde_json::from_value::<Self::Output>(result_value) {
self.post_process(&typed_result, context)?;
let agent_name = if let Some(agent_enum) = self.agent_type_enum() {
agent_enum.display_name(&context.config.target_language)
} else {
agent_type_value.clone()
};
println!("✅ Sub-Agent [{}] execution completed", agent_name);
Ok(typed_result)
} else {
Err(anyhow::format_err!(""))
}
}
}