deepwiki-rs 1.5.0

use crate::generator::preprocess::memory::{MemoryScope, ScopedKeys};
use crate::generator::research::types::{AgentType, BoundaryAnalysisReport};
use crate::generator::{
    context::GeneratorContext,
    step_forward_agent::{
        AgentDataConfig, DataSource, FormatterConfig, LLMCallMode, PromptTemplate, StepForwardAgent,
    },
};
use crate::types::code::{CodeInsight, CodePurpose};
use anyhow::{Result, anyhow};
use async_trait::async_trait;

/// Boundary Interface Analyzer - Responsible for analyzing the external call boundaries of the system, including CLI, API, configuration interfaces, etc.
#[derive(Default, Clone)]
pub struct BoundaryAnalyzer;

#[async_trait]
impl StepForwardAgent for BoundaryAnalyzer {
    type Output = BoundaryAnalysisReport;

    fn agent_type(&self) -> String {
        AgentType::BoundaryAnalyzer.to_string()
    }

    fn agent_type_enum(&self) -> Option<AgentType> {
        Some(AgentType::BoundaryAnalyzer)
    }

    fn memory_scope_key(&self) -> String {
        crate::generator::research::memory::MemoryScope::STUDIES_RESEARCH.to_string()
    }

    fn data_config(&self) -> AgentDataConfig {
        AgentDataConfig {
            required_sources: vec![
                DataSource::PROJECT_STRUCTURE,
                DataSource::DEPENDENCY_ANALYSIS,
                DataSource::ResearchResult(AgentType::SystemContextResearcher.to_string()),
            ],
            // Use API and deployment docs for boundary analysis
            optional_sources: vec![DataSource::knowledge_categories(vec!["api", "deployment"])],
        }
    }

    fn prompt_template(&self) -> PromptTemplate {
        PromptTemplate {
            system_prompt:
                r#"You are a professional system boundary interface analyst. Your task is to identify and analyze external call boundaries of software systems.

## What to Look For:

### CLI Commands (cli_boundaries)
Look in Entry-type files for:
- Command-line argument parsing (e.g., argparse, commander, clap, yargs)
- Main function parameters
- Process.argv usage
- Environment variable reading
- Configuration file loading
- Any program startup options

### API Interfaces (api_boundaries)  
Look in Api/Controller-type files for:
- HTTP route handlers
- REST endpoints
- GraphQL resolvers
- RPC method definitions
- Webhook handlers

### Router Routes (router_boundaries)
Look in Router-type files for:
- URL path definitions
- Route parameters
- Page routing logic
- Middleware chains

### Configuration (can be documented as CLI or Integration)
Look in Config-type files for:
- Configuration parameters
- Environment variables
- Feature flags
- Startup options

## Important:
- Even if code doesn't have explicit CLI/API definitions, extract what you can from entry points and config files
- Document how users interact with the system (command line, config files, etc.)
- If you find configuration parameters, document them as CLI boundaries or integration suggestions
- NEVER leave all arrays empty if you have Entry or Config code - at minimum document the startup/configuration interface

You MUST return a valid JSON object:
{
  "cli_boundaries": [...],
  "api_boundaries": [...],
  "router_boundaries": [...],
  "integration_suggestions": [...],
  "confidence_score": 0.0
}

Rules:
- Include all top-level keys
- Use empty arrays only if truly no boundaries exist
- confidence_score: 0.0-10.0"#
                    .to_string(),

            opening_instruction: "Analyze the system's boundary interfaces based on the following code:".to_string(),

            closing_instruction: r#"
## Analysis Instructions:
1. **Entry files**: Look for CLI arguments, environment variables, config loading - these ARE boundaries!
2. **Config files**: Document configuration parameters as CLI boundaries or integration suggestions
3. **No API/Router code?** That's fine - focus on CLI/configuration interfaces
4. **Minimum output**: If you have Entry/Config code, document at least the startup interface

DO NOT return all empty arrays if you have Entry or Config code to analyze!"#
                .to_string(),

            llm_call_mode: LLMCallMode::Extract,

            formatter_config: FormatterConfig::default(),
        }
    }

    /// Provide custom boundary code analysis content
    async fn provide_custom_prompt_content(
        &self,
        context: &GeneratorContext,
    ) -> Result<Option<String>> {
        // 1. Filter boundary-related code insights
        let boundary_insights = self.filter_boundary_code_insights(context).await?;

        if boundary_insights.is_empty() {
            return Ok(Some(
                "### Boundary-Related Code Insights\nNo obvious boundary interface-related code found.\n\n".to_string(),
            ));
        }

        // 2. Format boundary code insights
        let formatted_content = self.format_boundary_insights(&boundary_insights);

        Ok(Some(formatted_content))
    }

    /// Post-processing - output analysis summary
    fn post_process(
        &self,
        result: &BoundaryAnalysisReport,
        _context: &GeneratorContext,
    ) -> Result<()> {
        println!("✅ Boundary interface analysis completed:");
        println!("   - CLI commands: {} items", result.cli_boundaries.len());
        println!("   - API interfaces: {} items", result.api_boundaries.len());
        println!("   - Router routes: {} items", result.router_boundaries.len());
        println!("   - Integration suggestions: {} items", result.integration_suggestions.len());
        println!("   - Confidence: {:.1}/10", result.confidence_score);

        Ok(())
    }
}

impl BoundaryAnalyzer {
    /// Filter boundary-related code insights
    async fn filter_boundary_code_insights(
        &self,
        context: &GeneratorContext,
    ) -> Result<Vec<CodeInsight>> {
        let all_insights = context
            .get_from_memory::<Vec<CodeInsight>>(MemoryScope::PREPROCESS, ScopedKeys::CODE_INSIGHTS)
            .await
            .ok_or_else(|| anyhow!("CODE_INSIGHTS not found in PREPROCESS memory"))?;

        // Filter boundary-related code
        let boundary_insights: Vec<CodeInsight> = all_insights
            .into_iter()
            .filter(|insight| {
                matches!(
                    insight.code_dossier.code_purpose,
                    CodePurpose::Entry
                        | CodePurpose::Api
                        | CodePurpose::Config
                        | CodePurpose::Router
                        | CodePurpose::Controller
                )
            })
            .collect();

        // Sort by importance
        let mut sorted_insights = boundary_insights;
        sorted_insights.sort_by(|a, b| {
            b.code_dossier
                .importance_score
                .partial_cmp(&a.code_dossier.importance_score)
                .unwrap_or(std::cmp::Ordering::Equal)
        });
        
        // Use configuration value for max boundary insights
        let max_insights = context.config.boundary_analysis.max_boundary_insights;
        sorted_insights.truncate(max_insights);

        // Group by type and count
        let mut entry_count = 0;
        let mut api_count = 0;
        let mut config_count = 0;
        let mut router_count = 0;

        for insight in &sorted_insights {
            match insight.code_dossier.code_purpose {
                CodePurpose::Entry => entry_count += 1,
                CodePurpose::Api => api_count += 1,
                CodePurpose::Config => config_count += 1,
                CodePurpose::Router => router_count += 1,
                CodePurpose::Controller => api_count += 1,
                _ => {}
            }
        }

        println!(
            "📊 Boundary code distribution: Entry({}) API/Controller({}) Config({}) Router({})",
            entry_count, api_count, config_count, router_count
        );

        Ok(sorted_insights)
    }

    /// Format boundary code insights - specialized formatting logic
    fn format_boundary_insights(&self, insights: &[CodeInsight]) -> String {
        let mut content = String::from("### Boundary-Related Code Insights\n");

        // Group by CodePurpose for display
        let mut entry_codes = Vec::new();
        let mut api_codes = Vec::new();
        let mut config_codes = Vec::new();
        let mut router_codes = Vec::new();

        for insight in insights {
            match insight.code_dossier.code_purpose {
                CodePurpose::Entry => entry_codes.push(insight),
                CodePurpose::Api => api_codes.push(insight),
                CodePurpose::Controller => api_codes.push(insight),
                CodePurpose::Config => config_codes.push(insight),
                CodePurpose::Router => router_codes.push(insight),
                _ => {}
            }
        }

        if !entry_codes.is_empty() {
            content.push_str("#### Entry Point Code (Entry)\n");
            content.push_str("These code usually contain CLI command definitions, main function entry points, etc.:\n\n");
            for insight in entry_codes {
                self.add_boundary_insight_item(&mut content, insight);
            }
        }

        if !api_codes.is_empty() {
            content.push_str("#### API/Controller Code (API/Controller)\n");
            content.push_str("These code usually contain HTTP endpoints, API routes, controller logic, etc.:\n\n");
            for insight in api_codes {
                self.add_boundary_insight_item(&mut content, insight);
            }
        }

        if !config_codes.is_empty() {
            content.push_str("#### Configuration-Related Code (Config)\n");
            content.push_str("These code usually contain configuration structures, parameter definitions, environment variables, etc.:\n\n");
            for insight in config_codes {
                self.add_boundary_insight_item(&mut content, insight);
            }
        }

        if !router_codes.is_empty() {
            content.push_str("#### Router-Related Code (Router)\n");
            content.push_str("These code usually contain route definitions, middleware, request handling, etc.:\n\n");
            for insight in router_codes {
                self.add_boundary_insight_item(&mut content, insight);
            }
        }

        content.push_str("\n");
        content
    }

    /// Add single boundary code insight item with full context
    fn add_boundary_insight_item(&self, content: &mut String, insight: &CodeInsight) {
        content.push_str(&format!(
            "**File**: `{}` (Importance: {:.2}, Purpose: {:?})\n",
            insight.code_dossier.file_path.to_string_lossy(),
            insight.code_dossier.importance_score,
            insight.code_dossier.code_purpose
        ));

        if !insight.detailed_description.is_empty() {
            content.push_str(&format!("- **Description**: {}\n", insight.detailed_description));
        }

        if !insight.responsibilities.is_empty() {
            content.push_str(&format!("- **Responsibilities**: {}\n", insight.responsibilities.join(", ")));
        }

        // Include detailed interface information for CLI/API extraction
        if !insight.interfaces.is_empty() {
            content.push_str("- **Interfaces/Functions**:\n");
            for interface in &insight.interfaces {
                content.push_str(&format!("  - `{}` ({})", interface.name, interface.interface_type));
                if !interface.parameters.is_empty() {
                    let params: Vec<String> = interface.parameters.iter()
                        .map(|p| format!("{}: {}", p.name, p.param_type))
                        .collect();
                    content.push_str(&format!("({})", params.join(", ")));
                }
                if let Some(ref ret) = interface.return_type {
                    content.push_str(&format!(" -> {}", ret));
                }
                content.push_str("\n");
            }
        }

        // Include dependencies for understanding external integrations
        if !insight.dependencies.is_empty() {
            content.push_str("- **Key Dependencies**: ");
            let dep_names: Vec<&str> = insight.dependencies.iter()
                .filter(|d| d.is_external)
                .map(|d| d.name.as_str())
                .take(10)
                .collect();
            content.push_str(&format!("{}\n", dep_names.join(", ")));
        }

        // Always include source summary for boundary analysis
        if !insight.code_dossier.source_summary.is_empty() {
            content.push_str(&format!(
                "- **Source Code**:\n```\n{}\n```\n",
                insight.code_dossier.source_summary
            ));
        }

        content.push_str("\n");
    }
}