use crate::server::helpers::io_error_data;
use crate::server::types::{
GroupedKnownMatch, GroupedMatch, SearchCodebaseParams, SearchCodebaseResponse,
SearchResultGroup,
};
use crate::server::PathfinderServer;
use futures::StreamExt as _;
use pathfinder_common::types::FilterMode;
use pathfinder_search::{SearchMatch, SearchParams};
use pathfinder_treesitter::language::SupportedLanguage;
use rmcp::handler::server::wrapper::Json;
use rmcp::model::ErrorData;
use std::collections::HashMap;
use std::path::Path;
const ENRICHMENT_CONCURRENCY: usize = 32;
type EnrichResult = (Option<String>, String);
impl PathfinderServer {
pub(crate) async fn search_codebase_impl(
&self,
params: SearchCodebaseParams,
) -> Result<Json<SearchCodebaseResponse>, ErrorData> {
let start = std::time::Instant::now();
tracing::info!(
tool = "search_codebase",
query = %params.query,
is_regex = params.is_regex,
path_glob = %params.path_glob,
exclude_glob = %params.exclude_glob,
known_files_count = params.known_files.len(),
group_by_file = params.group_by_file,
filter_mode = ?params.filter_mode,
"search_codebase: start"
);
let search_params = SearchParams {
workspace_root: self.workspace_root.path().to_path_buf(),
query: params.query.clone(),
is_regex: params.is_regex,
path_glob: params.path_glob.clone(),
exclude_glob: params.exclude_glob.clone(),
max_results: params.max_results as usize,
context_lines: params.context_lines as usize,
};
let ripgrep_start = std::time::Instant::now();
match self.scout.search(&search_params).await {
Ok(result) => {
let ripgrep_ms = ripgrep_start.elapsed().as_millis();
let mut enriched_matches = result.matches;
let ts_start = std::time::Instant::now();
let node_types = self.enrich_matches(&mut enriched_matches).await;
let tree_sitter_parse_ms = ts_start.elapsed().as_millis();
let degraded = enriched_matches
.iter()
.any(|m| SupportedLanguage::detect(Path::new(&m.file)).is_none());
let degraded_reason = if degraded {
Some("unsupported_language".to_owned())
} else {
None
};
let filtered_matches =
apply_filter_mode(enriched_matches, &node_types, params.filter_mode);
let known_set: std::collections::HashSet<String> = params
.known_files
.iter()
.map(|p| normalize_path(p))
.collect();
let file_groups = if params.group_by_file {
Some(build_file_groups(&filtered_matches, &known_set))
} else {
None
};
let flat_matches: Vec<SearchMatch> = filtered_matches
.into_iter()
.map(|mut m| {
if known_set.contains(&normalize_path(&m.file)) {
m.content = String::default();
m.context_before = vec![];
m.context_after = vec![];
m.known = Some(true);
}
m
})
.collect();
let returned_count = flat_matches.len();
let duration_ms = start.elapsed().as_millis();
tracing::info!(
tool = "search_codebase",
total_matches = result.total_matches,
returned = returned_count,
truncated = result.truncated,
filter_mode = ?params.filter_mode,
ripgrep_ms,
tree_sitter_parse_ms,
duration_ms,
engines_used = ?["ripgrep", "treesitter"],
"search_codebase: complete"
);
Ok(Json(SearchCodebaseResponse {
matches: flat_matches,
total_matches: result.total_matches,
truncated: result.truncated,
file_groups,
degraded,
degraded_reason,
}))
}
Err(err) => {
let duration_ms = start.elapsed().as_millis();
tracing::warn!(
tool = "search_codebase",
error = %err,
error_code = "INTERNAL_ERROR",
error_message = %err,
duration_ms,
engines_used = ?["ripgrep"],
"search_codebase: failed"
);
Err(io_error_data(err.to_string()))
}
}
}
async fn enrich_matches(&self, matches: &mut [SearchMatch]) -> Vec<String> {
let snapshots: Vec<(String, u64, u64)> = matches
.iter()
.map(|m| (m.file.clone(), m.line, m.column))
.collect();
let enrichment: Vec<EnrichResult> = futures::stream::iter(snapshots)
.map(|(file, line_u64, column_u64)| async move {
let file_path = Path::new(&file);
let line = usize::try_from(line_u64).unwrap_or(usize::MAX);
let column = usize::try_from(column_u64).unwrap_or(0);
let symbol = self
.surgeon
.enclosing_symbol(self.workspace_root.path(), file_path, line)
.await
.ok()
.flatten()
.map(|s| format!("{file}::{s}"));
let node_type = self
.surgeon
.node_type_at_position(self.workspace_root.path(), file_path, line, column)
.await
.unwrap_or_else(|_| "code".to_owned());
(symbol, node_type)
})
.buffer_unordered(ENRICHMENT_CONCURRENCY)
.collect()
.await;
enrichment
.into_iter()
.zip(matches.iter_mut())
.map(|((symbol, node_type), m)| {
m.enclosing_semantic_path = symbol;
node_type
})
.collect()
}
}
fn apply_filter_mode(
matches: Vec<SearchMatch>,
node_types: &[String],
mode: FilterMode,
) -> Vec<SearchMatch> {
match mode {
FilterMode::All => matches,
FilterMode::CodeOnly => matches
.into_iter()
.zip(node_types.iter())
.filter(|(_, t)| t.as_str() == "code")
.map(|(m, _)| m)
.collect(),
FilterMode::CommentsOnly => matches
.into_iter()
.zip(node_types.iter())
.filter(|(_, t)| t.as_str() == "comment" || t.as_str() == "string")
.map(|(m, _)| m)
.collect(),
}
}
fn normalize_path(p: &str) -> String {
p.strip_prefix("./").unwrap_or(p).to_owned()
}
fn build_file_groups(
matches: &[SearchMatch],
known_set: &std::collections::HashSet<String>,
) -> Vec<SearchResultGroup> {
let mut order: Vec<String> = Vec::new();
let mut groups: HashMap<String, SearchResultGroup> = HashMap::new();
for m in matches {
let key = normalize_path(&m.file);
if !groups.contains_key(&key) {
order.push(key.clone());
groups.insert(
key.clone(),
SearchResultGroup {
file: m.file.clone(),
version_hash: m.version_hash.clone(),
matches: Vec::new(),
known_matches: Vec::new(),
},
);
}
if let Some(group) = groups.get_mut(&key) {
if known_set.contains(&key) {
group.known_matches.push(GroupedKnownMatch {
line: m.line,
column: m.column,
enclosing_semantic_path: m.enclosing_semantic_path.clone(),
known: true,
});
} else {
group.matches.push(GroupedMatch {
line: m.line,
column: m.column,
content: m.content.clone(),
context_before: m.context_before.clone(),
context_after: m.context_after.clone(),
enclosing_semantic_path: m.enclosing_semantic_path.clone(),
});
}
}
}
order
.into_iter()
.filter_map(|k| groups.remove(&k))
.collect()
}
#[cfg(test)]
#[allow(clippy::expect_used, clippy::unwrap_used)]
mod tests {
use super::*;
use crate::server::PathfinderServer;
use pathfinder_common::config::PathfinderConfig;
use pathfinder_common::sandbox::Sandbox;
use pathfinder_common::types::WorkspaceRoot;
use pathfinder_search::RipgrepScout;
use pathfinder_treesitter::mock::MockSurgeon;
use std::sync::Arc;
#[tokio::test]
async fn test_search_codebase_degraded_on_unsupported_language() {
let ws_dir = tempfile::tempdir().unwrap();
let ws = WorkspaceRoot::new(ws_dir.path()).unwrap();
let config = PathfinderConfig::default();
let sandbox = Sandbox::new(ws.path(), &config.sandbox);
std::fs::create_dir_all(ws_dir.path().join("src")).unwrap();
std::fs::write(ws_dir.path().join("src/data.xyz"), "findme content").unwrap();
let scout = Arc::new(RipgrepScout::new());
let surgeon = Arc::new(MockSurgeon::new());
surgeon
.enclosing_symbol_results
.lock()
.unwrap()
.push(Ok(None));
surgeon
.node_type_at_position_results
.lock()
.unwrap()
.push(Ok("code".to_string()));
let lawyer = Arc::new(pathfinder_lsp::NoOpLawyer);
let server =
PathfinderServer::with_all_engines(ws, config, sandbox, scout, surgeon, lawyer);
let params = SearchCodebaseParams {
query: "findme".to_owned(),
is_regex: false,
path_glob: "**/*.xyz".to_owned(),
exclude_glob: String::default(),
max_results: 10,
context_lines: 0,
known_files: vec![],
group_by_file: false,
filter_mode: pathfinder_common::types::FilterMode::default(),
};
let result = server.search_codebase_impl(params).await;
let response = result.expect("search should succeed");
assert!(
response.0.degraded,
"should be degraded for unsupported language"
);
assert_eq!(
response.0.degraded_reason.as_deref(),
Some("unsupported_language")
);
}
}