use super::file_search_bridge::{self, FileSearchConfig};
use super::grep_cache::GrepSearchCache;
use anyhow::{Context, Result};
use serde_json::{self, Value};
use std::num::NonZeroUsize;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::Mutex;
use std::sync::OnceLock;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering;
use std::thread;
use std::time::Duration;
use tokio::task::spawn_blocking;
use tracing::warn;
const MAX_SEARCH_RESULTS: NonZeroUsize = NonZeroUsize::new(5).unwrap();
static OPTIMAL_SEARCH_THREADS: OnceLock<NonZeroUsize> = OnceLock::new();
fn optimal_search_threads() -> NonZeroUsize {
*OPTIMAL_SEARCH_THREADS.get_or_init(|| {
let cpu_count = num_cpus::get();
let threads = (cpu_count * 3 / 4).clamp(2, 8);
NonZeroUsize::new(threads).unwrap_or(NonZeroUsize::new(2).unwrap())
})
}
const DEFAULT_MAX_RESULT_BYTES: usize = 32 * 1024;
const DEFAULT_SEARCH_TIMEOUT: Duration = Duration::from_secs(5);
const DEFAULT_IGNORE_GLOBS: &[&str] = &[
"**/.git/**",
"**/node_modules/**",
"**/target/**",
"**/.cursor/**",
];
const SEARCH_DEBOUNCE: Duration = Duration::from_millis(150);
const ACTIVE_SEARCH_COMPLETE_POLL_INTERVAL: Duration = Duration::from_millis(20);
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct GrepSearchInput {
pub pattern: String,
pub path: String,
pub case_sensitive: Option<bool>,
pub literal: Option<bool>,
pub glob_pattern: Option<String>,
pub context_lines: Option<usize>,
pub include_hidden: Option<bool>,
pub max_results: Option<usize>,
pub respect_ignore_files: Option<bool>, pub max_file_size: Option<usize>, pub search_hidden: Option<bool>, pub search_binary: Option<bool>, pub files_with_matches: Option<bool>, pub type_pattern: Option<String>, pub invert_match: Option<bool>, pub word_boundaries: Option<bool>, pub line_number: Option<bool>, pub column: Option<bool>, pub only_matching: Option<bool>, pub trim: Option<bool>, pub max_result_bytes: Option<usize>, pub timeout: Option<Duration>, pub extra_ignore_globs: Option<Vec<String>>, }
impl GrepSearchInput {
#[inline]
pub fn new(pattern: String, path: String) -> Self {
Self {
pattern,
path,
case_sensitive: None,
literal: None,
glob_pattern: None,
context_lines: None,
include_hidden: None,
max_results: None,
respect_ignore_files: None,
max_file_size: None,
search_hidden: None,
search_binary: None,
files_with_matches: None,
type_pattern: None,
invert_match: None,
word_boundaries: None,
line_number: None,
column: None,
only_matching: None,
trim: None,
max_result_bytes: None,
timeout: None,
extra_ignore_globs: None,
}
}
#[inline]
pub fn with_defaults(pattern: String, path: String) -> Self {
Self {
pattern,
path,
case_sensitive: Some(true),
literal: Some(false),
glob_pattern: None,
context_lines: None,
include_hidden: Some(false),
max_results: Some(MAX_SEARCH_RESULTS.get()),
respect_ignore_files: Some(true),
max_file_size: None,
search_hidden: Some(false),
search_binary: Some(false),
files_with_matches: Some(false),
type_pattern: None,
invert_match: Some(false),
word_boundaries: Some(false),
line_number: Some(true),
column: Some(false),
only_matching: Some(false),
trim: Some(false),
max_result_bytes: Some(DEFAULT_MAX_RESULT_BYTES),
timeout: Some(DEFAULT_SEARCH_TIMEOUT),
extra_ignore_globs: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GrepSearchResult {
pub query: String,
pub matches: Vec<Value>,
pub truncated: bool,
#[serde(default)]
pub total_matches: Option<usize>,
}
pub struct GrepSearchManager {
state: Arc<Mutex<SearchState>>,
search_dir: PathBuf,
cache: Arc<GrepSearchCache>,
}
struct SearchState {
latest_query: String,
is_search_scheduled: bool,
active_search: Option<ActiveSearch>,
last_result: Option<GrepSearchResult>,
}
struct ActiveSearch {
query: String,
cancellation_token: Arc<AtomicBool>,
}
impl GrepSearchManager {
pub fn new(search_dir: PathBuf) -> Self {
Self {
state: Arc::new(Mutex::new(SearchState {
latest_query: String::new(),
is_search_scheduled: false,
active_search: None,
last_result: None,
})),
search_dir,
cache: Arc::new(GrepSearchCache::new(100)), }
}
fn cached_result(cache: &GrepSearchCache, input: &GrepSearchInput) -> Option<GrepSearchResult> {
cache.get(input).map(|cached| GrepSearchResult {
query: cached.query.clone(),
matches: cached.matches.clone(),
truncated: cached.truncated,
total_matches: cached.total_matches,
})
}
pub fn on_user_query(&self, query: &str) {
{
let mut st = match self.state.lock() {
Ok(state) => state,
Err(err) => {
warn!("grep search state lock poisoned while handling query update: {err}");
return;
}
};
if query != st.latest_query {
st.latest_query.clear();
st.latest_query.push_str(query);
} else {
return;
}
if let Some(active_search) = &st.active_search
&& !query.starts_with(&active_search.query)
{
active_search
.cancellation_token
.store(true, Ordering::Relaxed);
st.active_search = None;
}
if !st.is_search_scheduled {
st.is_search_scheduled = true;
} else {
return;
}
}
let state = self.state.clone();
let search_dir = self.search_dir.clone();
let cache = self.cache.clone();
spawn_blocking(move || {
thread::sleep(SEARCH_DEBOUNCE);
loop {
let active_is_none = match state.lock() {
Ok(st) => st.active_search.is_none(),
Err(err) => {
warn!(
"grep search state lock poisoned while waiting for active search: {err}"
);
return;
}
};
if active_is_none {
break;
}
thread::sleep(ACTIVE_SEARCH_COMPLETE_POLL_INTERVAL);
}
let cancellation_token = Arc::new(AtomicBool::new(false));
let token = cancellation_token.clone();
let query = {
let mut st = match state.lock() {
Ok(state) => state,
Err(err) => {
warn!(
"grep search state lock poisoned while preparing debounced search: {err}"
);
return;
}
};
let query = st.latest_query.clone();
st.is_search_scheduled = false;
st.active_search = Some(ActiveSearch {
query: query.clone(),
cancellation_token: token,
});
query
};
GrepSearchManager::spawn_grep_file(
query,
search_dir,
cancellation_token,
state,
Some(cache),
);
});
}
pub fn last_result(&self) -> Option<GrepSearchResult> {
match self.state.lock() {
Ok(st) => st.last_result.clone(),
Err(err) => {
warn!("grep search state lock poisoned while reading last result: {err}");
None
}
}
}
fn execute_with_backends(input: &GrepSearchInput) -> Result<(Vec<Value>, bool, usize)> {
Self::run_ripgrep_backend(input)
}
fn run_ripgrep_backend(input: &GrepSearchInput) -> Result<(Vec<Value>, bool, usize)> {
use std::process::Command;
let mut cmd = Command::new("rg");
cmd.arg("-j")
.arg(optimal_search_threads().get().to_string());
if !input.respect_ignore_files.unwrap_or(true) {
cmd.arg("--no-ignore");
}
if input.search_hidden.unwrap_or(false) {
cmd.arg("--hidden");
}
if input.search_binary.unwrap_or(false) {
cmd.arg("--binary");
}
if input.files_with_matches.unwrap_or(false) {
cmd.arg("--files-with-matches");
}
if let Some(type_pattern) = &input.type_pattern {
cmd.arg("--type").arg(type_pattern);
}
if let Some(max_file_size) = input.max_file_size {
cmd.arg("--max-filesize").arg(format!("{}B", max_file_size));
}
if let Some(case_sensitive) = input.case_sensitive {
if case_sensitive {
cmd.arg("--case-sensitive");
} else {
cmd.arg("--ignore-case");
}
} else {
cmd.arg("--smart-case");
}
if input.invert_match.unwrap_or(false) {
cmd.arg("--invert-match");
}
if input.word_boundaries.unwrap_or(false) {
cmd.arg("--word-regexp");
}
if input.line_number.unwrap_or(true) {
cmd.arg("--line-number");
} else {
cmd.arg("--no-line-number");
}
if input.column.unwrap_or(false) {
cmd.arg("--column");
}
if input.only_matching.unwrap_or(false) {
cmd.arg("--only-matching");
}
if input.trim.unwrap_or(false) {
}
if let Some(literal) = input.literal
&& literal
{
cmd.arg("--fixed-strings");
}
if let Some(glob_pattern) = &input.glob_pattern {
cmd.arg("--glob").arg(glob_pattern);
}
if input.respect_ignore_files.unwrap_or(true) {
for pattern in DEFAULT_IGNORE_GLOBS {
cmd.arg("--glob").arg(format!("!{}", pattern));
}
if let Some(extra) = &input.extra_ignore_globs {
for pattern in extra {
cmd.arg("--glob").arg(format!("!{}", pattern));
}
}
}
if let Some(context_lines) = input.context_lines {
cmd.arg("--context").arg(context_lines.to_string());
}
let max_results = input.max_results.unwrap_or(MAX_SEARCH_RESULTS.get());
cmd.arg("--max-count").arg(max_results.to_string());
cmd.arg("--json");
cmd.arg(&input.pattern);
cmd.arg(&input.path);
let output = cmd.output().with_context(|| {
format!("failed to execute ripgrep for pattern '{}'", input.pattern)
})?;
let output_str = String::from_utf8_lossy(&output.stdout);
let matches: Vec<Value> = output_str
.lines()
.filter_map(|line| serde_json::from_str::<Value>(line).ok())
.collect();
Ok(Self::finalize_matches(matches, input))
}
fn finalize_matches(
mut matches: Vec<Value>,
input: &GrepSearchInput,
) -> (Vec<Value>, bool, usize) {
let mut truncated = false;
let max_results = input.max_results.unwrap_or(MAX_SEARCH_RESULTS.get());
if max_results == 0 {
return (Vec::new(), !matches.is_empty(), 0);
}
let total_match_count = matches
.iter()
.filter(|e| e.get("type").and_then(Value::as_str) == Some("match"))
.count();
let mut match_count = 0usize;
let mut cut_index = matches.len();
for (i, entry) in matches.iter().enumerate() {
let is_match = entry
.get("type")
.and_then(Value::as_str)
.is_some_and(|t| t == "match");
if is_match {
match_count += 1;
if match_count >= max_results {
cut_index = i + 1;
for rest in matches.iter().skip(i + 1) {
let tp = rest.get("type").and_then(Value::as_str);
if tp == Some("context") {
cut_index += 1;
} else {
break;
}
}
break;
}
}
}
if matches[cut_index..]
.iter()
.any(|e| e.get("type").and_then(Value::as_str) == Some("match"))
{
truncated = true;
}
if cut_index < matches.len() {
matches.truncate(cut_index);
}
if let Some(limit) = input.max_result_bytes {
let mut total = 0usize;
let mut kept_count = 0;
for entry in &matches {
let entry_bytes = entry.to_string().len();
if total + entry_bytes > limit {
truncated = true;
break;
}
total += entry_bytes;
kept_count += 1;
}
matches.truncate(kept_count);
}
(matches, truncated, total_match_count)
}
fn spawn_grep_file(
query: String,
search_dir: PathBuf,
cancellation_token: Arc<AtomicBool>,
search_state: Arc<Mutex<SearchState>>,
cache: Option<Arc<GrepSearchCache>>,
) {
spawn_blocking(move || {
if cancellation_token.load(Ordering::Relaxed) {
{
let mut st = match search_state.lock() {
Ok(state) => state,
Err(err) => {
warn!("grep search state lock poisoned while cancelling search: {err}");
return;
}
};
if let Some(active_search) = &st.active_search
&& Arc::ptr_eq(&active_search.cancellation_token, &cancellation_token)
{
st.active_search = None;
}
}
return;
}
let input = GrepSearchInput::with_defaults(
query.clone(),
search_dir.to_string_lossy().into_owned(),
);
if let Some(ref cache) = cache
&& let Some(cached_result) = Self::cached_result(cache, &input)
{
let mut st = match search_state.lock() {
Ok(state) => state,
Err(err) => {
warn!("grep search state lock poisoned while loading cached result: {err}");
return;
}
};
st.last_result = Some(cached_result);
return;
}
let search_result = GrepSearchManager::execute_with_backends(&input);
let is_cancelled = cancellation_token.load(Ordering::Relaxed);
if !is_cancelled
&& let Ok((matches, truncated, total_match_count)) = search_result
&& !matches.is_empty()
{
let result = GrepSearchResult {
query,
matches,
truncated,
total_matches: if truncated {
Some(total_match_count)
} else {
None
},
};
if let Some(ref cache) = cache
&& GrepSearchCache::should_cache(&result)
{
cache.put(&input, result.clone());
}
let mut st = match search_state.lock() {
Ok(state) => state,
Err(err) => {
warn!("grep search state lock poisoned while storing search result: {err}");
return;
}
};
st.last_result = Some(result);
}
{
let mut st = match search_state.lock() {
Ok(state) => state,
Err(err) => {
warn!(
"grep search state lock poisoned while clearing active search: {err}"
);
return;
}
};
if let Some(active_search) = &st.active_search
&& Arc::ptr_eq(&active_search.cancellation_token, &cancellation_token)
{
st.active_search = None;
}
}
});
}
pub async fn perform_search(&self, input: GrepSearchInput) -> Result<GrepSearchResult> {
if let Some(cached_result) = Self::cached_result(&self.cache, &input) {
return Ok(cached_result);
}
let query = input.pattern.clone();
let input_clone = input.clone();
let timeout = input.timeout.unwrap_or(DEFAULT_SEARCH_TIMEOUT);
let (matches, truncated, total_match_count) = tokio::time::timeout(
timeout,
spawn_blocking(move || GrepSearchManager::execute_with_backends(&input_clone)),
)
.await
.context("ripgrep search timed out")?
.context("ripgrep search worker panicked")??;
let result = GrepSearchResult {
query,
matches,
truncated,
total_matches: if truncated {
Some(total_match_count)
} else {
None
},
};
if GrepSearchCache::should_cache(&result) {
self.cache.put(&input, result.clone());
}
Ok(result)
}
pub fn enumerate_files_with_pattern(
&self,
pattern: String,
max_results: usize,
cancel_flag: Option<Arc<AtomicBool>>,
) -> Result<Vec<String>> {
let config = FileSearchConfig::new(pattern, self.search_dir.clone())
.with_limit(max_results)
.respect_gitignore(true);
let results = file_search_bridge::search_files(config, cancel_flag)?;
Ok(file_search_bridge::file_matches_only(results.matches)
.into_iter()
.map(|m| m.path)
.collect())
}
pub fn list_all_files(
&self,
max_results: usize,
exclude_patterns: Vec<String>,
) -> Result<Vec<String>> {
let mut config = FileSearchConfig::new("".to_string(), self.search_dir.clone())
.with_limit(max_results)
.respect_gitignore(true);
for pattern in exclude_patterns {
config = config.exclude(pattern);
}
let results = file_search_bridge::search_files(config, None)?;
Ok(file_search_bridge::file_matches_only(results.matches)
.into_iter()
.map(|m| m.path)
.collect())
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn finalize_matches_respects_max_bytes() {
let mut input = GrepSearchInput::with_defaults("pat".into(), ".".into());
input.max_result_bytes = Some(100);
input.max_results = Some(5);
let matches = vec![json!({"text": "12345"}), json!({"text": "6789"})];
let (kept, truncated, _total) = GrepSearchManager::finalize_matches(matches, &input);
assert!(!truncated);
assert_eq!(kept.len(), 2);
input.max_result_bytes = Some(20);
let matches = vec![json!({"text": "12345"}), json!({"text": "6789"})];
let (kept, truncated, _total) = GrepSearchManager::finalize_matches(matches, &input);
assert!(truncated);
assert_eq!(kept.len(), 1); }
#[test]
fn finalize_matches_counts_only_match_type_entries() {
let mut input = GrepSearchInput::with_defaults("pat".into(), ".".into());
input.max_results = Some(2);
let matches = vec![
json!({"type": "begin", "data": {"path": {"text": "Cargo.lock"}}}),
json!({"type": "context", "data": {"line_number": 538, "lines": {"text": "ctx1"}}}),
json!({"type": "context", "data": {"line_number": 539, "lines": {"text": "ctx2"}}}),
json!({"type": "match", "data": {"line_number": 553, "lines": {"text": "match1"}}}),
json!({"type": "context", "data": {"line_number": 554, "lines": {"text": "ctx3"}}}),
json!({"type": "context", "data": {"line_number": 555, "lines": {"text": "ctx4"}}}),
json!({"type": "context", "data": {"line_number": 560, "lines": {"text": "ctx5"}}}),
json!({"type": "match", "data": {"line_number": 563, "lines": {"text": "match2"}}}),
json!({"type": "context", "data": {"line_number": 564, "lines": {"text": "ctx6"}}}),
json!({"type": "end", "data": {"path": {"text": "Cargo.lock"}}}),
];
let (kept, truncated, total) = GrepSearchManager::finalize_matches(matches, &input);
assert!(!truncated);
assert_eq!(kept.len(), 9);
assert_eq!(kept[3]["type"], "match");
assert_eq!(kept[7]["type"], "match");
assert_eq!(total, 2);
}
#[test]
fn finalize_matches_truncates_when_more_match_types_than_limit() {
let mut input = GrepSearchInput::with_defaults("pat".into(), ".".into());
input.max_results = Some(1);
let matches = vec![
json!({"type": "begin", "data": {"path": {"text": "f.txt"}}}),
json!({"type": "match", "data": {"line_number": 1, "lines": {"text": "m1"}}}),
json!({"type": "context", "data": {"line_number": 2, "lines": {"text": "c1"}}}),
json!({"type": "match", "data": {"line_number": 10, "lines": {"text": "m2"}}}),
json!({"type": "context", "data": {"line_number": 11, "lines": {"text": "c2"}}}),
];
let (kept, truncated, total) = GrepSearchManager::finalize_matches(matches, &input);
assert!(truncated);
assert_eq!(kept.len(), 3);
assert_eq!(kept[1]["type"], "match");
assert_eq!(kept[2]["type"], "context");
assert_eq!(total, 2); }
#[test]
fn test_grep_search_manager_creation() {
let manager = GrepSearchManager::new(PathBuf::from("."));
assert_eq!(manager.search_dir, PathBuf::from("."));
}
#[test]
fn test_grep_search_input_new() {
let input = GrepSearchInput::new("pattern".to_string(), "/path/to/search".to_string());
assert_eq!(input.pattern, "pattern");
assert_eq!(input.path, "/path/to/search");
assert!(input.case_sensitive.is_none());
}
#[test]
fn test_grep_search_input_with_defaults() {
let input = GrepSearchInput::with_defaults("pattern".to_string(), "/path".to_string());
assert_eq!(input.pattern, "pattern");
assert_eq!(input.path, "/path");
assert_eq!(input.case_sensitive, Some(true));
assert_eq!(input.include_hidden, Some(false));
assert_eq!(input.max_results, Some(MAX_SEARCH_RESULTS.get()));
}
}