1use anyhow::Result;
5use fuzzy_matcher::skim::SkimMatcherV2;
6use fuzzy_matcher::FuzzyMatcher;
7use notify::{Config, Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher};
8use serde::{Deserialize, Serialize};
9use serde_json::Value;
10use std::collections::HashMap;
11use std::fs::{self, File};
12use std::io::{BufRead, BufReader};
13use std::path::{Path, PathBuf};
14use std::sync::mpsc::{channel, Receiver, Sender};
15use std::sync::{Arc, Mutex};
16use std::thread;
17use std::time::{Duration, Instant};
18
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct SearchConfig {
21 pub max_lines_per_file: usize, pub max_file_size_mb: u64, pub search_timeout_ms: u64, pub fuzzy_threshold: i64, pub smart_sampling: bool, pub watch_patterns: Vec<String>, }
28
29impl Default for SearchConfig {
30 fn default() -> Self {
31 Self {
32 max_lines_per_file: 1000,
33 max_file_size_mb: 50,
34 search_timeout_ms: 500,
35 fuzzy_threshold: 50,
36 smart_sampling: true,
37 watch_patterns: vec![
38 "*.json".to_string(),
39 "*.jsonl".to_string(),
40 "*.md".to_string(),
41 "*.log".to_string(),
42 "*.txt".to_string(),
43 ],
44 }
45 }
46}
47
48#[derive(Debug, Clone, Serialize, Deserialize)]
49pub struct SearchResult {
50 pub file_path: PathBuf,
51 pub line_number: usize,
52 pub content: String,
53 pub score: i64,
54 pub context: Vec<String>, pub file_type: String,
56 pub timestamp: std::time::SystemTime,
57}
58
59pub struct SmartBackgroundSearcher {
60 config: SearchConfig,
61 search_index: Arc<Mutex<HashMap<PathBuf, Vec<SearchResult>>>>,
62 watcher: Option<RecommendedWatcher>,
63 sender: Sender<SearchEvent>,
64}
65
66enum SearchEvent {
67 Search { query: String, paths: Vec<PathBuf> },
68 FileChanged(PathBuf),
69 Stop,
70}
71
72impl SmartBackgroundSearcher {
73 pub fn new(config: SearchConfig) -> Result<Self> {
74 let (sender, receiver) = channel();
75 let search_index = Arc::new(Mutex::new(HashMap::new()));
76
77 let index_clone = search_index.clone();
79 let config_clone = config.clone();
80
81 thread::spawn(move || {
82 Self::search_worker(receiver, index_clone, config_clone);
83 });
84
85 Ok(Self {
86 config,
87 search_index,
88 watcher: None,
89 sender,
90 })
91 }
92
93 fn search_worker(
94 receiver: Receiver<SearchEvent>,
95 index: Arc<Mutex<HashMap<PathBuf, Vec<SearchResult>>>>,
96 config: SearchConfig,
97 ) {
98 let fuzzy_matcher = SkimMatcherV2::default();
99
100 while let Ok(event) = receiver.recv() {
101 match event {
102 SearchEvent::Search { query, paths } => {
103 for path in paths {
104 if let Ok(results) =
105 Self::search_file(&path, &query, &config, &fuzzy_matcher)
106 {
107 if !results.is_empty() {
108 if let Ok(mut idx) = index.lock() {
109 idx.insert(path, results);
110 }
111 }
112 }
113 }
114 }
115 SearchEvent::FileChanged(path) => {
116 if let Ok(mut idx) = index.lock() {
118 idx.remove(&path);
119 }
120 }
121 SearchEvent::Stop => break,
122 }
123 }
124 }
125
126 fn search_file(
127 path: &Path,
128 query: &str,
129 config: &SearchConfig,
130 matcher: &SkimMatcherV2,
131 ) -> Result<Vec<SearchResult>> {
132 let start = Instant::now();
133 let mut results = Vec::new();
134
135 let metadata = fs::metadata(path)?;
137 if metadata.len() > config.max_file_size_mb * 1024 * 1024 {
138 return Ok(results); }
140
141 let file = File::open(path)?;
142 let reader = BufReader::new(file);
143
144 let ext = path.extension().and_then(|s| s.to_str()).unwrap_or("");
145 let file_type = Self::detect_file_type(ext);
146
147 let max_lines = match ext {
149 "jsonl" => config.max_lines_per_file,
150 "log" => config.max_lines_per_file,
151 _ => config.max_lines_per_file * 5, };
153
154 let mut line_number = 0;
155 let mut lines_buffer: Vec<String> = Vec::with_capacity(5);
156
157 for line_result in reader.lines() {
158 if start.elapsed().as_millis() > config.search_timeout_ms as u128 {
160 break;
161 }
162
163 line_number += 1;
164 if line_number > max_lines {
165 if config.smart_sampling {
166 if line_number % 10 != 0 {
168 continue;
169 }
170 } else {
171 break;
172 }
173 }
174
175 if let Ok(line) = line_result {
176 lines_buffer.push(line.clone());
178 if lines_buffer.len() > 5 {
179 lines_buffer.remove(0);
180 }
181
182 if let Some(score) = matcher.fuzzy_match(&line, query) {
184 if score >= config.fuzzy_threshold {
185 let content = if ext == "jsonl" {
187 Self::extract_jsonl_content(&line).unwrap_or(line.clone())
188 } else {
189 line.clone()
190 };
191
192 results.push(SearchResult {
193 file_path: path.to_path_buf(),
194 line_number,
195 content,
196 score,
197 context: lines_buffer.clone(),
198 file_type: file_type.clone(),
199 timestamp: std::time::SystemTime::now(),
200 });
201 }
202 }
203
204 if line.to_lowercase().contains(&query.to_lowercase()) {
206 let content = if ext == "jsonl" {
207 Self::extract_jsonl_content(&line).unwrap_or(line.clone())
208 } else {
209 line.clone()
210 };
211
212 results.push(SearchResult {
213 file_path: path.to_path_buf(),
214 line_number,
215 content,
216 score: 100, context: lines_buffer.clone(),
218 file_type: file_type.clone(),
219 timestamp: std::time::SystemTime::now(),
220 });
221 }
222 }
223 }
224
225 Ok(results)
226 }
227
228 fn extract_jsonl_content(line: &str) -> Option<String> {
229 if let Ok(json) = serde_json::from_str::<Value>(line) {
231 let mut parts = Vec::new();
233
234 if let Some(msg) = json.get("message").and_then(|v| v.as_str()) {
235 parts.push(msg.to_string());
236 }
237 if let Some(prompt) = json.get("prompt").and_then(|v| v.as_str()) {
238 parts.push(format!("Prompt: {}", prompt));
239 }
240 if let Some(response) = json.get("response").and_then(|v| v.as_str()) {
241 parts.push(format!("Response: {}", response));
242 }
243 if let Some(content) = json.get("content").and_then(|v| v.as_str()) {
244 parts.push(content.to_string());
245 }
246
247 if !parts.is_empty() {
248 return Some(parts.join(" | "));
249 }
250 }
251 None
252 }
253
254 fn detect_file_type(ext: &str) -> String {
255 match ext {
256 "json" => "json".to_string(),
257 "jsonl" => "jsonl_stream".to_string(),
258 "md" | "markdown" => "markdown".to_string(),
259 "log" => "log_file".to_string(),
260 "txt" => "text_file".to_string(),
261 _ => "unknown".to_string(),
262 }
263 }
264
265 pub async fn search(&self, query: &str, paths: Vec<PathBuf>) -> Vec<SearchResult> {
266 let _ = self.sender.send(SearchEvent::Search {
268 query: query.to_string(),
269 paths: paths.clone(),
270 });
271
272 thread::sleep(Duration::from_millis(self.config.search_timeout_ms));
274
275 let mut all_results = Vec::new();
277 if let Ok(idx) = self.search_index.lock() {
278 for path in paths {
279 if let Some(results) = idx.get(&path) {
280 all_results.extend(results.clone());
281 }
282 }
283 }
284
285 all_results.sort_by(|a, b| b.score.cmp(&a.score));
287 all_results
288 }
289
290 pub fn start_watching(&mut self, watch_paths: Vec<PathBuf>) -> Result<()> {
291 let sender = self.sender.clone();
292 let config = self.config.clone();
293
294 let mut watcher = RecommendedWatcher::new(
295 move |res: Result<Event, notify::Error>| {
296 if let Ok(event) = res {
297 if matches!(event.kind, EventKind::Create(_) | EventKind::Modify(_)) {
298 for path in event.paths {
299 if let Some(ext) = path.extension().and_then(|s| s.to_str()) {
301 let should_watch = config.watch_patterns.iter().any(|pattern| {
302 pattern.ends_with(&format!("*.{}", ext))
303 || pattern == &format!("*.{}", ext)
304 });
305
306 if should_watch {
307 println!("🔍 File changed, re-indexing: {}", path.display());
308 let _ = sender.send(SearchEvent::FileChanged(path));
309 }
310 }
311 }
312 }
313 }
314 },
315 Config::default(),
316 )?;
317
318 for path in &watch_paths {
320 if path.exists() {
321 watcher.watch(path, RecursiveMode::Recursive)?;
322 println!("👁️ Watching for changes in: {}", path.display());
323 }
324 }
325
326 self.watcher = Some(watcher);
327
328 self.initial_index(watch_paths)?;
330
331 Ok(())
332 }
333
334 fn initial_index(&self, watch_paths: Vec<PathBuf>) -> Result<()> {
335 println!("🔍 Initial indexing of watched directories...");
336
337 for watch_path in watch_paths {
338 if watch_path.is_dir() {
339 for pattern in &self.config.watch_patterns {
341 let glob_pattern = format!("{}/{}", watch_path.display(), pattern);
342 if let Ok(paths) = glob::glob(&glob_pattern) {
343 let files: Vec<PathBuf> = paths
344 .filter_map(|p| p.ok())
345 .filter(|p| p.is_file())
346 .collect();
347
348 if !files.is_empty() {
349 println!(" Found {} {} files", files.len(), pattern);
350 let _ = self.sender.send(SearchEvent::Search {
352 query: String::new(), paths: files,
354 });
355 }
356 }
357 }
358 }
359 }
360
361 println!("✅ Initial indexing complete!");
362 Ok(())
363 }
364
365 pub fn get_cached_results(&self, path: &Path) -> Vec<SearchResult> {
366 if let Ok(idx) = self.search_index.lock() {
367 idx.get(path).cloned().unwrap_or_default()
368 } else {
369 Vec::new()
370 }
371 }
372
373 pub fn clear_cache(&self) {
374 if let Ok(mut idx) = self.search_index.lock() {
375 idx.clear();
376 }
377 }
378}
379
380pub async fn handle_smart_search(params: Value) -> Result<Value> {
382 let query = params["query"]
383 .as_str()
384 .ok_or_else(|| anyhow::anyhow!("Missing query parameter"))?;
385
386 let paths: Vec<PathBuf> = params["paths"]
387 .as_array()
388 .map(|arr| {
389 arr.iter()
390 .filter_map(|v| v.as_str())
391 .map(PathBuf::from)
392 .collect()
393 })
394 .unwrap_or_else(|| vec![std::env::current_dir().unwrap_or_default()]);
395
396 let config = SearchConfig::default();
397 let searcher = SmartBackgroundSearcher::new(config)?;
398
399 let results = searcher.search(query, paths).await;
400
401 let formatted: Vec<Value> = results
403 .into_iter()
404 .take(20) .map(|r| {
406 serde_json::json!({
407 "file": r.file_path.to_string_lossy(),
408 "line": r.line_number,
409 "content": r.content,
410 "score": r.score,
411 "type": r.file_type,
412 "context": r.context,
413 })
414 })
415 .collect();
416
417 Ok(serde_json::json!({
418 "results": formatted,
419 "count": formatted.len(),
420 "message": format!("Found {} matches for '{}'", formatted.len(), query)
421 }))
422}