use anyhow::Result;
use serde_json::{json, Value};
use tracing::debug;
use crate::config::Config;
use crate::indexer::search::{
search_codebase_with_details_multi_query_text, search_codebase_with_details_text,
};
use crate::indexer::{extract_file_signatures, render_signatures_text, NoindexWalker, PathUtils};
use crate::mcp::types::{McpError, McpTool};
use octolib::embedding::constants::MAX_QUERIES;
#[derive(Clone)]
pub struct SemanticCodeProvider {
config: Config,
working_directory: std::path::PathBuf,
}
impl SemanticCodeProvider {
pub fn new(config: Config, working_directory: std::path::PathBuf) -> Self {
Self {
config,
working_directory,
}
}
pub fn get_tool_definition() -> McpTool {
McpTool {
name: "semantic_search".to_string(),
description: "Search codebase by meaning — finds code by what it does, not exact symbol names. Prefer an array of related terms over a single query for broader coverage.".to_string(),
input_schema: json!({
"type": "object",
"properties": {
"query": {
"oneOf": [
{
"type": "string",
"description": "Descriptive phrase about what the code does (not a symbol name)",
"minLength": 10,
"maxLength": 500
},
{
"type": "array",
"items": {
"type": "string",
"minLength": 10,
"maxLength": 500
},
"minItems": 1,
"maxItems": 5,
"description": "Array of related search terms — finds all related code in one call"
}
],
"description": "String or array of strings describing functionality to find. Array preferred for comprehensive results."
},
"mode": {
"type": "string",
"description": "Content type filter: 'code' (functions/classes), 'text' (plain text), 'docs' (markdown/README), 'all' (default)",
"enum": ["code", "text", "docs", "all"],
"default": "all"
},
"detail_level": {
"type": "string",
"description": "Result verbosity: 'signatures' (declarations only), 'partial' (truncated, default), 'full' (complete bodies)",
"enum": ["signatures", "partial", "full"],
"default": "partial"
},
"max_results": {
"type": "integer",
"description": "Max results to return (default: 3)",
"minimum": 1,
"maximum": 20,
"default": 3
},
"threshold": {
"type": "number",
"description": "Similarity cutoff 0.0–1.0 (higher = stricter match)",
"minimum": 0.0,
"maximum": 1.0
},
"language": {
"type": "string",
"description": "Filter code results by language (rust, python, typescript, go, etc.)"
},
},
"required": ["query"],
"additionalProperties": false
}),
}
}
pub fn get_view_signatures_tool_definition() -> McpTool {
McpTool {
name: "view_signatures".to_string(),
description: "Extract function signatures, class definitions, and declarations from files without implementation bodies. Supports Rust, JS/TS, Python, Go, C++, PHP, Ruby, Bash, JSON, CSS, Svelte, Markdown.".to_string(),
input_schema: json!({
"type": "object",
"properties": {
"files": {
"type": "array",
"description": "File paths or glob patterns (e.g. 'src/main.rs', '**/*.py', 'src/**/*.ts')",
"items": { "type": "string" },
"minItems": 1,
"maxItems": 100
},
},
"required": ["files"],
"additionalProperties": false
}),
}
}
pub async fn execute_search(&self, arguments: &Value) -> Result<String, McpError> {
let queries: Vec<String> = match arguments.get("query") {
Some(Value::String(s)) => vec![s.clone()],
Some(Value::Array(arr)) => {
let queries: Vec<String> = arr
.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect();
if queries.is_empty() {
return Err(McpError::invalid_params(
"Invalid query array: must contain at least one non-empty string",
"semantic_search",
));
}
queries
}
_ => {
return Err(McpError::invalid_params(
"Missing required parameter 'query': must be a string or array of strings describing what to search for",
"semantic_search"
));
}
};
if queries.len() > MAX_QUERIES {
return Err(McpError::invalid_params(
format!("Too many queries: maximum {} queries allowed, got {}. Use fewer, more specific terms.", MAX_QUERIES, queries.len()),
"semantic_search"
));
}
for (i, query) in queries.iter().enumerate() {
let clean_query = String::from_utf8_lossy(query.as_bytes()).to_string();
let query = clean_query.trim();
if query.len() < 3 {
return Err(McpError::invalid_params(
format!(
"Invalid query {}: must be at least 3 characters long",
i + 1
),
"semantic_search",
));
}
if query.len() > 500 {
return Err(McpError::invalid_params(
format!(
"Invalid query {}: must be no more than 500 characters long",
i + 1
),
"semantic_search",
));
}
if query.is_empty() {
return Err(McpError::invalid_params(
format!(
"Invalid query {}: cannot be empty or whitespace only",
i + 1
),
"semantic_search",
));
}
}
let mode = arguments
.get("mode")
.and_then(|v| v.as_str())
.unwrap_or("all");
if !["code", "text", "docs", "all"].contains(&mode) {
return Err(McpError::invalid_params(
format!(
"Invalid mode '{}': must be one of 'code', 'text', 'docs', or 'all'",
mode
),
"semantic_search",
));
}
let detail_level = arguments
.get("detail_level")
.and_then(|v| v.as_str())
.unwrap_or("partial");
if !["signatures", "partial", "full"].contains(&detail_level) {
return Err(McpError::invalid_params(
format!(
"Invalid detail_level '{}': must be one of 'signatures', 'partial', or 'full'",
detail_level
),
"semantic_search",
));
}
let max_results = arguments
.get("max_results")
.and_then(|v| v.as_u64())
.unwrap_or(3) as usize;
if !(1..=20).contains(&max_results) {
return Err(McpError::invalid_params(
format!(
"Invalid max_results '{}': must be between 1 and 20",
max_results
),
"semantic_search",
));
}
let similarity_threshold = arguments
.get("threshold")
.and_then(|v| v.as_f64())
.map(|v| v as f32)
.unwrap_or(self.config.search.similarity_threshold);
if !(0.0..=1.0).contains(&similarity_threshold) {
return Err(McpError::invalid_params(
format!(
"Invalid similarity threshold '{}': must be between 0.0 and 1.0",
similarity_threshold
),
"semantic_search",
));
}
let language_filter = if let Some(language_value) = arguments.get("language") {
let language = language_value.as_str().ok_or_else(|| {
McpError::invalid_params(
"Invalid language parameter: must be a string",
"semantic_search",
)
})?;
use crate::indexer::languages;
if languages::get_language(language).is_none() {
return Err(McpError::invalid_params(
format!("Invalid language '{}': supported languages are rust, javascript, typescript, python, go, cpp, php, bash, ruby, json, svelte, css", language),
"semantic_search"
));
}
Some(language.to_string())
} else {
None
};
debug!(
queries = ?queries,
mode = %mode,
detail_level = %detail_level,
max_results = %max_results,
similarity_threshold = %similarity_threshold,
language_filter = ?language_filter,
working_directory = %self.working_directory.display(),
"Executing semantic code search with {} queries",
queries.len()
);
let original_dir = match std::env::current_dir() {
Ok(dir) => dir,
Err(e) => {
return Err(McpError::internal_error(
format!("Failed to get current directory: {}", e),
"semantic_search",
));
}
};
if let Err(e) = std::env::set_current_dir(&self.working_directory) {
return Err(McpError::internal_error(
format!(
"Failed to change to working directory '{}': {}",
self.working_directory.display(),
e
),
"semantic_search",
)
.with_details(format!("Path: {}", self.working_directory.display())));
}
let results = if queries.len() == 1 {
search_codebase_with_details_text(
&queries[0],
mode,
detail_level,
max_results,
similarity_threshold,
language_filter.as_deref(),
&self.config,
)
.await
} else {
search_codebase_with_details_multi_query_text(
&queries,
mode,
detail_level,
max_results,
similarity_threshold,
language_filter.as_deref(),
&self.config,
)
.await
};
if let Err(e) = std::env::set_current_dir(&original_dir) {
debug!(
error = %e,
original_dir = %original_dir.display(),
"Failed to restore original directory"
);
}
match results {
Ok(output) => Ok(output),
Err(e) => Err(McpError::internal_error(
format!("Search operation failed: {}", e),
"semantic_search",
)),
}
}
pub async fn execute_view_signatures(&self, arguments: &Value) -> Result<String, McpError> {
let files_array = arguments
.get("files")
.and_then(|v| v.as_array())
.ok_or_else(|| McpError::invalid_params("Missing required parameter 'files': must be an array of file paths or glob patterns", "view_signatures"))?;
if files_array.is_empty() {
return Err(McpError::invalid_params(
"Invalid files parameter: array must contain at least one file path or pattern",
"view_signatures",
));
}
if files_array.len() > 100 {
return Err(McpError::invalid_params(
"Invalid files parameter: array must contain no more than 100 patterns",
"view_signatures",
));
}
let mut file_patterns = Vec::new();
for file_value in files_array {
let pattern = file_value.as_str().ok_or_else(|| {
McpError::invalid_params(
"Invalid file pattern: all items in files array must be strings",
"view_signatures",
)
})?;
if pattern.trim().is_empty() {
return Err(McpError::invalid_params(
"Invalid file pattern: patterns cannot be empty",
"view_signatures",
));
}
let clean_pattern = String::from_utf8_lossy(pattern.as_bytes()).to_string();
let pattern = clean_pattern.trim();
if pattern.len() > 500 {
return Err(McpError::invalid_params(
format!(
"Invalid file pattern '{}': must be no more than 500 characters long",
pattern
),
"view_signatures",
));
}
if pattern.contains("..") && (pattern.contains("../") || pattern.contains("..\\")) {
return Err(McpError::invalid_params(
format!(
"Invalid file pattern '{}': path traversal not allowed",
pattern
),
"view_signatures",
));
}
file_patterns.push(pattern.to_string());
}
debug!(
file_patterns = ?file_patterns,
working_directory = %self.working_directory.display(),
"Executing view_signatures"
);
let mut matching_files = std::collections::HashSet::new();
let mut compiled_patterns = Vec::new();
for pattern in &file_patterns {
let glob_pattern = match globset::Glob::new(pattern) {
Ok(g) => g.compile_matcher(),
Err(e) => {
return Err(McpError::invalid_params(
format!("Invalid glob pattern '{}': {}", pattern, e),
"view_signatures",
));
}
};
compiled_patterns.push(glob_pattern);
}
let walker = NoindexWalker::create_walker(&self.working_directory).build();
for result in walker {
let entry = match result {
Ok(entry) => entry,
Err(_) => continue,
};
if !entry.file_type().is_some_and(|ft| ft.is_file()) {
continue;
}
let relative_path =
PathUtils::to_relative_string(entry.path(), &self.working_directory);
for glob_pattern in &compiled_patterns {
if glob_pattern.is_match(&relative_path) {
matching_files.insert(entry.path().to_path_buf());
break; }
}
}
let matching_files: Vec<_> = matching_files.into_iter().collect();
if matching_files.is_empty() {
return Ok("No matching files found for the specified patterns.".to_string());
}
let signatures = match extract_file_signatures(&matching_files) {
Ok(sigs) => sigs,
Err(e) => {
return Err(McpError::internal_error(
format!("Failed to extract signatures: {}", e),
"view_signatures",
));
}
};
let text_output = render_signatures_text(&signatures);
Ok(text_output)
}
}