octocode 0.11.0

// Copyright 2025 Muvon Un Limited
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use anyhow::Result;
use serde_json::{json, Value};
use tracing::debug;

use crate::config::Config;
use crate::embedding::truncate_output;
use crate::indexer::{self, graphrag::GraphRAG};
use crate::mcp::types::{McpError, McpTool};

#[derive(Debug, Clone)]
pub enum GraphRAGOperation {
	Search,
	GetNode,
	GetRelationships,
	FindPath,
	Overview,
}

#[derive(Debug, Clone)]
pub enum OutputFormat {
	Text,
	Json,
	Md,
	Cli,
}

impl OutputFormat {
	pub fn is_json(&self) -> bool {
		matches!(self, OutputFormat::Json)
	}

	pub fn is_md(&self) -> bool {
		matches!(self, OutputFormat::Md)
	}

	pub fn is_text(&self) -> bool {
		matches!(self, OutputFormat::Text)
	}

	pub fn is_cli(&self) -> bool {
		matches!(self, OutputFormat::Cli)
	}
}

#[derive(Debug, Clone)]
pub struct GraphRAGArgs {
	pub operation: GraphRAGOperation,
	pub query: Option<String>,
	pub node_id: Option<String>,
	pub source_id: Option<String>,
	pub target_id: Option<String>,
	pub max_depth: usize,
	pub format: OutputFormat,
}

/// GraphRAG tool provider
#[derive(Clone)]
pub struct GraphRagProvider {
	graphrag: GraphRAG,
	working_directory: std::path::PathBuf,
}

impl GraphRagProvider {
	pub fn new(config: Config, working_directory: std::path::PathBuf) -> Option<Self> {
		if config.graphrag.enabled {
			Some(Self {
				graphrag: GraphRAG::new(config),
				working_directory,
			})
		} else {
			None
		}
	}

	/// Get the tool definition for graphrag
	pub fn get_tool_definition() -> McpTool {
		McpTool {
			name: "graphrag".to_string(),
			description: "Advanced relationship-aware GraphRAG operations for code analysis. Supports multiple operations: 'search' (find nodes by semantic query - excellent for file discovery by description), 'get-node' (detailed node info), 'get-relationships' (node connections), 'find-path' (connection paths between nodes), 'overview' (graph statistics). USE THIS TOOL for complex architectural queries about component interactions, data flows, dependency relationships, cross-cutting concerns, and finding files by their purpose/description. For simple code searches use semantic_search instead.".to_string(),
			input_schema: json!({
				"type": "object",
				"properties": {
					"operation": {
						"type": "string",
						"enum": ["search", "get-node", "get-relationships", "find-path", "overview"],
						"description": "GraphRAG operation to perform: 'search' (semantic node search), 'get-node' (detailed node information), 'get-relationships' (node connections), 'find-path' (paths between nodes), 'overview' (graph statistics)"
					},
					"query": {
						"type": "string",
						"description": "Search query for 'search' operation. Complex architectural queries about code relationships, dependencies, or system interactions. Examples: 'How does user authentication flow through the system?', 'What components depend on the database layer?', 'Show me the data flow for order processing'",
						"minLength": 10,
						"maxLength": 1000
					},
					"node_id": {
						"type": "string",
						"description": "Node identifier for 'get-node' and 'get-relationships' operations. Format: 'path/to/file' or 'path/to/file/symbol'"
					},
					"source_id": {
						"type": "string",
						"description": "Source node identifier for 'find-path' operation. Format: 'path/to/file' or 'path/to/file/symbol'"
					},
					"target_id": {
						"type": "string",
						"description": "Target node identifier for 'find-path' operation. Format: 'path/to/file' or 'path/to/file/symbol'"
					},
					"max_depth": {
						"type": "integer",
						"description": "Maximum path depth for 'find-path' operation (default: 3)",
						"minimum": 1,
						"maximum": 10,
						"default": 3
					},
					"format": {
						"type": "string",
						"enum": ["text", "json", "markdown"],
						"description": "Output format (default: 'text' for token efficiency)",
						"default": "text"
					},
					"max_tokens": {
						"type": "integer",
						"description": "Maximum tokens allowed in output before truncation (default: 2000, set to 0 for unlimited)",
						"minimum": 0,
						"default": 2000
					}
				},
				"required": ["operation"],
				"additionalProperties": false
			}),
		}
	}

	/// Execute the graphrag tool with any operation
	pub async fn execute(&self, arguments: &Value) -> Result<String, McpError> {
		// Parse and validate operation
		let operation_str = arguments
			.get("operation")
			.and_then(|v| v.as_str())
			.ok_or_else(|| McpError::invalid_params("Missing required parameter 'operation': must be one of 'search', 'get-node', 'get-relationships', 'find-path', 'overview'", "graphrag"))?;

		let operation = match operation_str {
			"search" => GraphRAGOperation::Search,
			"get-node" => GraphRAGOperation::GetNode,
			"get-relationships" => GraphRAGOperation::GetRelationships,
			"find-path" => GraphRAGOperation::FindPath,
			"overview" => GraphRAGOperation::Overview,
			_ => return Err(McpError::invalid_params(
				format!("Invalid operation '{}': must be one of 'search', 'get-node', 'get-relationships', 'find-path', 'overview'", operation_str),
				"graphrag"
			))
		};

		// Validate operation-specific parameters
		let (query, node_id, source_id, target_id) = match operation {
			GraphRAGOperation::Search => {
				let query = arguments
					.get("query")
					.and_then(|v| v.as_str())
					.ok_or_else(|| McpError::invalid_params("Missing required parameter 'query' for search operation: must be a detailed question about code relationships or architecture", "graphrag"))?;

				if query.len() < 10 {
					return Err(McpError::invalid_params("Invalid query: must be at least 10 characters long and describe relationships or architecture", "graphrag"));
				}
				if query.len() > 1000 {
					return Err(McpError::invalid_params(
						"Invalid query: must be no more than 1000 characters long",
						"graphrag",
					));
				}

				(Some(query.to_string()), None, None, None)
			}
			GraphRAGOperation::GetNode | GraphRAGOperation::GetRelationships => {
				let node_id = arguments
					.get("node_id")
					.and_then(|v| v.as_str())
					.ok_or_else(|| McpError::invalid_params(
						format!("Missing required parameter 'node_id' for {} operation: must be a valid node identifier", operation_str),
						"graphrag"
					))?;

				(None, Some(node_id.to_string()), None, None)
			}
			GraphRAGOperation::FindPath => {
				let source_id = arguments
					.get("source_id")
					.and_then(|v| v.as_str())
					.ok_or_else(|| McpError::invalid_params("Missing required parameter 'source_id' for find-path operation: must be a valid node identifier", "graphrag"))?;

				let target_id = arguments
					.get("target_id")
					.and_then(|v| v.as_str())
					.ok_or_else(|| McpError::invalid_params("Missing required parameter 'target_id' for find-path operation: must be a valid node identifier", "graphrag"))?;

				(
					None,
					None,
					Some(source_id.to_string()),
					Some(target_id.to_string()),
				)
			}
			GraphRAGOperation::Overview => (None, None, None, None),
		};

		// Parse optional parameters
		let max_depth = arguments
			.get("max_depth")
			.and_then(|v| v.as_u64())
			.unwrap_or(3) as usize;

		let format_str = arguments
			.get("format")
			.and_then(|v| v.as_str())
			.unwrap_or("text");

		let format = match format_str {
			"text" => OutputFormat::Text,
			"json" => OutputFormat::Json,
			"markdown" => OutputFormat::Md,
			_ => {
				return Err(McpError::invalid_params(
					format!(
						"Invalid format '{}': must be one of 'text', 'json', 'markdown'",
						format_str
					),
					"graphrag",
				))
			}
		};

		let max_tokens = arguments
			.get("max_tokens")
			.and_then(|v| v.as_u64())
			.unwrap_or(2000) as usize;

		// Create GraphRAGArgs structure for reusing CLI logic
		let args = GraphRAGArgs {
			operation,
			query,
			node_id,
			source_id,
			target_id,
			max_depth,
			format,
		};

		// Use structured logging for MCP protocol compliance
		debug!(
			operation = %operation_str,
			working_directory = %self.working_directory.display(),
			"Executing GraphRAG operation"
		);

		// Change to the working directory for the operation
		let original_dir = std::env::current_dir().map_err(|e| {
			McpError::internal_error(
				format!("Failed to get current directory: {}", e),
				"graphrag",
			)
		})?;
		std::env::set_current_dir(&self.working_directory).map_err(|e| {
			McpError::internal_error(format!("Failed to change directory: {}", e), "graphrag")
		})?;

		// Execute the GraphRAG operation using CLI logic
		let result = self.execute_graphrag_operation(&args).await.map_err(|e| {
			McpError::internal_error(format!("GraphRAG operation failed: {}", e), "graphrag")
		})?;

		// Restore original directory
		std::env::set_current_dir(&original_dir).map_err(|e| {
			McpError::internal_error(format!("Failed to restore directory: {}", e), "graphrag")
		})?;

		// Apply token truncation if needed
		Ok(truncate_output(&result, max_tokens))
	}

	/// Execute GraphRAG operation using CLI logic with MCP-optimized output
	async fn execute_graphrag_operation(&self, args: &GraphRAGArgs) -> Result<String> {
		// Check if GraphRAG is enabled (this should always be true since we're created conditionally)
		let config = self.graphrag.config();
		if !config.graphrag.enabled {
			return Err(anyhow::anyhow!("GraphRAG is not enabled in configuration"));
		}

		// Initialize the GraphBuilder
		let graph_builder = indexer::GraphBuilder::new_with_quiet(config.clone(), true)
			.await
			.map_err(|e| anyhow::anyhow!("Failed to initialize GraphRAG system: {}", e))?;

		// Get the current graph
		let graph = graph_builder
			.get_graph()
			.await
			.map_err(|e| anyhow::anyhow!("Failed to load GraphRAG knowledge graph: {}", e))?;

		// Check if graph is empty
		if graph.nodes.is_empty() {
			return Err(anyhow::anyhow!("GraphRAG knowledge graph is empty. Run 'octocode index' to build the knowledge graph."));
		}

		// Execute the requested operation and capture output
		match args.operation {
			GraphRAGOperation::Search => {
				let query = args.query.as_ref().unwrap(); // Validated in caller
				let nodes = graph_builder
					.search_nodes(query)
					.await
					.map_err(|e| anyhow::anyhow!("Search failed: {}", e))?;

				// Render based on format
				match args.format {
					OutputFormat::Json => {
						let json_output = serde_json::to_string_pretty(&nodes)
							.map_err(|e| anyhow::anyhow!("JSON serialization failed: {}", e))?;
						Ok(json_output)
					}
					OutputFormat::Md => Ok(indexer::graphrag::graphrag_nodes_to_markdown(&nodes)),
					_ => {
						// Default to text format for token efficiency
						Ok(indexer::graphrag::graphrag_nodes_to_text(&nodes))
					}
				}
			}
			GraphRAGOperation::GetNode => {
				let node_id = args.node_id.as_ref().unwrap(); // Validated in caller
				match graph.nodes.get(node_id) {
					Some(node) => {
						match args.format {
							OutputFormat::Json => {
								Ok(serde_json::to_string_pretty(node)
									.map_err(|e| anyhow::anyhow!("JSON serialization failed: {}", e))?)
							},
							OutputFormat::Md => {
								Ok(format!(
									"# Node: {}\n\n**ID:** {}\n**Kind:** {}\n**Path:** {}\n**Description:** {}\n\n**Symbols:**\n{}\n",
									node.name,
									node.id,
									node.kind,
									node.path,
									node.description,
									node.symbols.iter().map(|s| format!("- {}", s)).collect::<Vec<_>>().join("\n")
								))
							},
							_ => {
								// Text format for token efficiency
								Ok(format!(
									"Node: {}\nID: {}\nKind: {}\nPath: {}\nDescription: {}\nSymbols: {}\n",
									node.name,
									node.id,
									node.kind,
									node.path,
									node.description,
									node.symbols.join(", ")
								))
							}
						}
					}
					None => Err(anyhow::anyhow!("Node not found: {}", node_id)),
				}
			}
			GraphRAGOperation::GetRelationships => {
				let node_id = args.node_id.as_ref().unwrap(); // Validated in caller

				// Check if node exists
				if !graph.nodes.contains_key(node_id) {
					return Err(anyhow::anyhow!("Node not found: {}", node_id));
				}

				// Find relationships
				let relationships: Vec<_> = graph
					.relationships
					.iter()
					.filter(|rel| rel.source == *node_id || rel.target == *node_id)
					.collect();

				if relationships.is_empty() {
					return Ok(format!("No relationships found for node: {}", node_id));
				}

				match args.format {
					OutputFormat::Json => Ok(serde_json::to_string_pretty(&relationships)
						.map_err(|e| anyhow::anyhow!("JSON serialization failed: {}", e))?),
					OutputFormat::Md => {
						let mut output = format!("# Relationships for {}\n\n", node_id);

						// Outgoing relationships
						let outgoing: Vec<_> = relationships
							.iter()
							.filter(|rel| rel.source == *node_id)
							.collect();
						if !outgoing.is_empty() {
							output.push_str("## Outgoing Relationships\n\n");
							for rel in outgoing {
								let target_name = graph
									.nodes
									.get(&rel.target)
									.map(|n| n.name.clone())
									.unwrap_or_else(|| rel.target.clone());
								output.push_str(&format!(
									"- **{}** → {} ({}): {}\n",
									rel.relation_type, target_name, rel.target, rel.description
								));
							}
							output.push('\n');
						}

						// Incoming relationships
						let incoming: Vec<_> = relationships
							.iter()
							.filter(|rel| rel.target == *node_id)
							.collect();
						if !incoming.is_empty() {
							output.push_str("## Incoming Relationships\n\n");
							for rel in incoming {
								let source_name = graph
									.nodes
									.get(&rel.source)
									.map(|n| n.name.clone())
									.unwrap_or_else(|| rel.source.clone());
								output.push_str(&format!(
									"- **{}** ← {} ({}): {}\n",
									rel.relation_type, source_name, rel.source, rel.description
								));
							}
						}
						Ok(output)
					}
					_ => {
						// Text format for token efficiency
						let mut output = format!(
							"Relationships for {} ({} total):\n\n",
							node_id,
							relationships.len()
						);

						// Outgoing relationships
						let outgoing: Vec<_> = relationships
							.iter()
							.filter(|rel| rel.source == *node_id)
							.collect();
						if !outgoing.is_empty() {
							output.push_str("Outgoing:\n");
							for rel in outgoing {
								let target_name = graph
									.nodes
									.get(&rel.target)
									.map(|n| n.name.clone())
									.unwrap_or_else(|| rel.target.clone());
								output.push_str(&format!(
									"  {} → {} ({}): {}\n",
									rel.relation_type, target_name, rel.target, rel.description
								));
							}
							output.push('\n');
						}

						// Incoming relationships
						let incoming: Vec<_> = relationships
							.iter()
							.filter(|rel| rel.target == *node_id)
							.collect();
						if !incoming.is_empty() {
							output.push_str("Incoming:\n");
							for rel in incoming {
								let source_name = graph
									.nodes
									.get(&rel.source)
									.map(|n| n.name.clone())
									.unwrap_or_else(|| rel.source.clone());
								output.push_str(&format!(
									"  {} ← {} ({}): {}\n",
									rel.relation_type, source_name, rel.source, rel.description
								));
							}
						}
						Ok(output)
					}
				}
			}
			GraphRAGOperation::FindPath => {
				let source_id = args.source_id.as_ref().unwrap(); // Validated in caller
				let target_id = args.target_id.as_ref().unwrap(); // Validated in caller

				// Find paths
				let paths = graph_builder
					.find_paths(source_id, target_id, args.max_depth)
					.await
					.map_err(|e| anyhow::anyhow!("Path finding failed: {}", e))?;

				if paths.is_empty() {
					return Ok(format!(
						"No paths found between {} and {} within depth {}",
						source_id, target_id, args.max_depth
					));
				}

				match args.format {
					OutputFormat::Json => {
						// Create structured path data
						let path_data: Vec<_> = paths
							.iter()
							.enumerate()
							.map(|(i, path)| {
								json!({
									"path_index": i + 1,
									"nodes": path.iter().map(|node_id| {
										let node_name = graph.nodes.get(node_id)
											.map(|n| n.name.clone())
											.unwrap_or_else(|| node_id.clone());
										json!({"id": node_id, "name": node_name})
									}).collect::<Vec<_>>()
								})
							})
							.collect();
						Ok(serde_json::to_string_pretty(&path_data)
							.map_err(|e| anyhow::anyhow!("JSON serialization failed: {}", e))?)
					}
					OutputFormat::Md => {
						let mut output = format!(
							"# Paths from {} to {}\n\nFound {} paths:\n\n",
							source_id,
							target_id,
							paths.len()
						);
						for (i, path) in paths.iter().enumerate() {
							output.push_str(&format!("## Path {}\n\n", i + 1));
							for (j, node_id) in path.iter().enumerate() {
								let node_name = graph
									.nodes
									.get(node_id)
									.map(|n| n.name.clone())
									.unwrap_or_else(|| node_id.clone());
								if j > 0 {
									let prev_id = &path[j - 1];
									let rel = graph
										.relationships
										.iter()
										.find(|r| r.source == *prev_id && r.target == *node_id);
									if let Some(rel) = rel {
										output.push_str(&format!(" --{}-> ", rel.relation_type));
									} else {
										output.push_str(" -> ");
									}
								}
								output.push_str(&format!("**{}** ({})", node_name, node_id));
							}
							output.push_str("\n\n");
						}
						Ok(output)
					}
					_ => {
						// Text format for token efficiency
						let mut output = format!(
							"Paths from {} to {} ({} found):\n\n",
							source_id,
							target_id,
							paths.len()
						);
						for (i, path) in paths.iter().enumerate() {
							output.push_str(&format!("Path {}:\n", i + 1));
							for (j, node_id) in path.iter().enumerate() {
								let node_name = graph
									.nodes
									.get(node_id)
									.map(|n| n.name.clone())
									.unwrap_or_else(|| node_id.clone());
								if j > 0 {
									let prev_id = &path[j - 1];
									let rel = graph
										.relationships
										.iter()
										.find(|r| r.source == *prev_id && r.target == *node_id);
									if let Some(rel) = rel {
										output.push_str(&format!(" --{}-> ", rel.relation_type));
									} else {
										output.push_str(" -> ");
									}
								}
								output.push_str(&format!("{} ({})", node_name, node_id));
							}
							output.push_str("\n\n");
						}
						Ok(output)
					}
				}
			}
			GraphRAGOperation::Overview => {
				// Get statistics
				let node_count = graph.nodes.len();
				let relationship_count = graph.relationships.len();

				// Count node types
				let mut node_types = std::collections::HashMap::new();
				for node in graph.nodes.values() {
					*node_types.entry(node.kind.clone()).or_insert(0) += 1;
				}

				// Count relationship types
				let mut rel_types = std::collections::HashMap::new();
				for rel in &graph.relationships {
					*rel_types.entry(rel.relation_type.clone()).or_insert(0) += 1;
				}

				match args.format {
					OutputFormat::Json => {
						let overview = json!({
							"node_count": node_count,
							"relationship_count": relationship_count,
							"node_types": node_types,
							"relationship_types": rel_types
						});
						Ok(serde_json::to_string_pretty(&overview)
							.map_err(|e| anyhow::anyhow!("JSON serialization failed: {}", e))?)
					}
					OutputFormat::Md => {
						let mut output = format!("# GraphRAG Knowledge Graph Overview\n\nThe knowledge graph contains {} nodes and {} relationships.\n\n", node_count, relationship_count);

						output.push_str("## Node Types\n\n");
						for (kind, count) in node_types.iter() {
							output.push_str(&format!("- **{}**: {} nodes\n", kind, count));
						}

						output.push_str("\n## Relationship Types\n\n");
						for (rel_type, count) in rel_types.iter() {
							output.push_str(&format!(
								"- **{}**: {} relationships\n",
								rel_type, count
							));
						}
						Ok(output)
					}
					_ => {
						// Text format for token efficiency
						let mut output = format!(
							"GraphRAG Overview: {} nodes, {} relationships\n\n",
							node_count, relationship_count
						);

						output.push_str("Node Types:\n");
						for (kind, count) in node_types.iter() {
							output.push_str(&format!("  {}: {}\n", kind, count));
						}

						output.push_str("\nRelationship Types:\n");
						for (rel_type, count) in rel_types.iter() {
							output.push_str(&format!("  {}: {}\n", rel_type, count));
						}
						Ok(output)
					}
				}
			}
		}
	}
}