Skip to main content

st/
spicy_fuzzy.rs

1// Spicy Fuzzy Search - "Finding needles in haystacks at the speed of thought!" 🔍
2// Fuzzy matching with MEM8 context caching for instant recall
3
4use anyhow::Result;
5use bincode;
6use fuzzy_matcher::skim::SkimMatcherV2;
7use fuzzy_matcher::FuzzyMatcher;
8use rayon::prelude::*;
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::fs;
12use std::path::{Path, PathBuf};
13use std::sync::Arc;
14use std::time::{SystemTime, UNIX_EPOCH};
15
16use crate::memory_manager::MemoryManager;
17
18#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct FileMatch {
20    pub path: PathBuf,
21    pub line_number: usize,
22    pub line_content: String,
23    pub score: i64,
24    pub context_before: Vec<String>,
25    pub context_after: Vec<String>,
26    pub match_positions: Vec<usize>, // Character positions of matches
27}
28
29#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct DirectoryContext {
31    pub path: PathBuf,
32    pub files: Vec<PathBuf>,
33    pub file_contents_hash: HashMap<PathBuf, u64>, // Quick content fingerprints
34    pub last_scan: SystemTime,
35    pub wave_signature: u32, // Quantum wave signature for this directory
36}
37
38pub struct SpicyFuzzySearch {
39    matcher: Arc<SkimMatcherV2>,
40    memory_manager: MemoryManager,
41    context_cache: HashMap<PathBuf, DirectoryContext>,
42}
43
44impl SpicyFuzzySearch {
45    pub fn new() -> Result<Self> {
46        let matcher = Arc::new(SkimMatcherV2::default().smart_case().use_cache(true));
47        let memory_manager = MemoryManager::new()?;
48
49        // Load cached contexts from M8
50        let context_cache = Self::load_contexts_from_m8()?;
51
52        Ok(Self {
53            matcher,
54            memory_manager,
55            context_cache,
56        })
57    }
58
59    /// Fuzzy search across file contents with MEM8 caching
60    pub fn search_content(
61        &mut self,
62        root_path: &Path,
63        query: &str,
64        max_results: usize,
65    ) -> Result<Vec<FileMatch>> {
66        // Check if we have a cached context for this directory
67        let context = self.get_or_create_context(root_path)?;
68
69        // Parallel fuzzy search across all files
70        let matcher = self.matcher.clone();
71        let query = query.to_string();
72
73        let mut all_matches: Vec<FileMatch> = context
74            .files
75            .par_iter()
76            .filter_map(|file_path| Self::search_file(&matcher, file_path, &query).ok())
77            .flatten()
78            .collect();
79
80        // Sort by score (highest first)
81        all_matches.sort_by(|a, b| b.score.cmp(&a.score));
82        all_matches.truncate(max_results);
83
84        // Store search results in MEM8 for pattern learning
85        self.store_search_pattern(&query, &all_matches)?;
86
87        Ok(all_matches)
88    }
89
90    /// Search within a single file
91    fn search_file(matcher: &SkimMatcherV2, path: &Path, query: &str) -> Result<Vec<FileMatch>> {
92        // Skip binary files
93        if Self::is_binary(path)? {
94            return Ok(vec![]);
95        }
96
97        let content = fs::read_to_string(path)?;
98        let lines: Vec<&str> = content.lines().collect();
99        let mut matches = Vec::new();
100
101        for (line_idx, line) in lines.iter().enumerate() {
102            if let Some((score, indices)) = matcher.fuzzy_indices(line, query) {
103                // Get context lines (2 before, 2 after)
104                let context_before = lines
105                    .get(line_idx.saturating_sub(2)..line_idx)
106                    .map(|ls| ls.iter().map(|s| s.to_string()).collect())
107                    .unwrap_or_default();
108
109                let context_after = lines
110                    .get(line_idx + 1..=(line_idx + 2).min(lines.len() - 1))
111                    .map(|ls| ls.iter().map(|s| s.to_string()).collect())
112                    .unwrap_or_default();
113
114                matches.push(FileMatch {
115                    path: path.to_path_buf(),
116                    line_number: line_idx + 1,
117                    line_content: line.to_string(),
118                    score,
119                    context_before,
120                    context_after,
121                    match_positions: indices,
122                });
123            }
124        }
125
126        Ok(matches)
127    }
128
129    /// Fuzzy search file names only
130    pub fn search_filenames(
131        &mut self,
132        root_path: &Path,
133        query: &str,
134        max_results: usize,
135    ) -> Result<Vec<(PathBuf, i64)>> {
136        let context = self.get_or_create_context(root_path)?;
137        let matcher = self.matcher.clone();
138        let query = query.to_string();
139
140        let mut matches: Vec<(PathBuf, i64)> = context
141            .files
142            .par_iter()
143            .filter_map(|path| {
144                let filename = path.file_name()?.to_str()?;
145                matcher
146                    .fuzzy_match(filename, &query)
147                    .map(|score| (path.clone(), score))
148            })
149            .collect();
150
151        matches.sort_by(|a, b| b.1.cmp(&a.1));
152        matches.truncate(max_results);
153
154        Ok(matches)
155    }
156
157    /// Get or create directory context with M8 caching
158    fn get_or_create_context(&mut self, path: &Path) -> Result<DirectoryContext> {
159        // Check cache first
160        if let Some(context) = self.context_cache.get(path) {
161            // Check if cache is still fresh (< 5 minutes old)
162            if context.last_scan.elapsed()?.as_secs() < 300 {
163                return Ok(context.clone());
164            }
165        }
166
167        // Create new context
168        let context = self.scan_directory(path)?;
169
170        // Store in cache and M8
171        self.context_cache
172            .insert(path.to_path_buf(), context.clone());
173        self.save_context_to_m8(&context)?;
174
175        Ok(context)
176    }
177
178    /// Scan directory and create context
179    fn scan_directory(&self, path: &Path) -> Result<DirectoryContext> {
180        let mut files = Vec::new();
181        let mut file_contents_hash = HashMap::new();
182
183        // Walk directory recursively
184        for entry in walkdir::WalkDir::new(path)
185            .max_depth(10)
186            .into_iter()
187            .filter_map(|e| e.ok())
188        {
189            if entry.file_type().is_file() {
190                let file_path = entry.path().to_path_buf();
191
192                // Skip common ignored patterns
193                if Self::should_ignore(&file_path) {
194                    continue;
195                }
196
197                // Calculate content hash for change detection
198                if let Ok(content) = fs::read(&file_path) {
199                    let hash = crc32fast::hash(&content) as u64;
200                    file_contents_hash.insert(file_path.clone(), hash);
201                }
202
203                files.push(file_path);
204            }
205        }
206
207        // Generate quantum wave signature for this directory
208        let wave_signature = self.generate_wave_signature(path);
209
210        Ok(DirectoryContext {
211            path: path.to_path_buf(),
212            files,
213            file_contents_hash,
214            last_scan: SystemTime::now(),
215            wave_signature,
216        })
217    }
218
219    /// Generate a unique wave signature for a directory
220    fn generate_wave_signature(&self, path: &Path) -> u32 {
221        let path_str = path.display().to_string();
222        let hash = crc32fast::hash(path_str.as_bytes());
223
224        // Mix with timestamp for temporal component
225        let now = SystemTime::now()
226            .duration_since(UNIX_EPOCH)
227            .unwrap_or_default()
228            .as_secs() as u32;
229
230        hash.wrapping_add(now) ^ 0xDEADBEEF // Spicy constant!
231    }
232
233    /// Check if file should be ignored
234    fn should_ignore(path: &Path) -> bool {
235        if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
236            // Common ignore patterns
237            if name.starts_with('.') && name != ".env" {
238                return true;
239            }
240
241            // Binary and build artifacts
242            matches!(
243                name,
244                "node_modules"
245                    | "target"
246                    | "dist"
247                    | "build"
248                    | "*.pyc"
249                    | "*.pyo"
250                    | "*.so"
251                    | "*.dll"
252                    | "*.exe"
253            )
254        } else {
255            false
256        }
257    }
258
259    /// Quick binary file detection
260    fn is_binary(path: &Path) -> Result<bool> {
261        let mut buffer = [0u8; 512];
262        use std::io::Read;
263        let mut file = fs::File::open(path)?;
264        let bytes_read = file.read(&mut buffer)?;
265
266        // Check for null bytes (common in binary files)
267        Ok(buffer[..bytes_read].contains(&0))
268    }
269
270    /// Save directory context to M8 format
271    fn save_context_to_m8(&self, context: &DirectoryContext) -> Result<()> {
272        let cwd = std::env::current_dir()?;
273        let m8_path = cwd
274            .join(".st")
275            .join("contexts")
276            .join(format!("{:08x}.m8", context.wave_signature));
277
278        // Create directories if needed
279        if let Some(parent) = m8_path.parent() {
280            fs::create_dir_all(parent)?;
281        }
282
283        // Serialize and compress
284        let data = bincode::serialize(context)?;
285        use std::io::Write;
286        let mut encoder = flate2::write::ZlibEncoder::new(Vec::new(), flate2::Compression::best());
287        encoder.write_all(&data)?;
288        let compressed = encoder.finish()?;
289
290        fs::write(m8_path, compressed)?;
291        Ok(())
292    }
293
294    /// Load cached contexts from M8 files
295    fn load_contexts_from_m8() -> Result<HashMap<PathBuf, DirectoryContext>> {
296        let mut contexts = HashMap::new();
297        let cwd = std::env::current_dir()?;
298        let contexts_dir = cwd.join(".st").join("contexts");
299
300        if !contexts_dir.exists() {
301            return Ok(contexts);
302        }
303
304        for entry in fs::read_dir(contexts_dir)? {
305            let entry = entry?;
306            let path = entry.path();
307
308            if path.extension().and_then(|e| e.to_str()) == Some("m8") {
309                if let Ok(compressed) = fs::read(&path) {
310                    // Decompress and deserialize
311                    use std::io::Read;
312                    let mut decoder = flate2::read::ZlibDecoder::new(&compressed[..]);
313                    let mut data = Vec::new();
314
315                    if decoder.read_to_end(&mut data).is_ok() {
316                        if let Ok(context) = bincode::deserialize::<DirectoryContext>(&data) {
317                            contexts.insert(context.path.clone(), context);
318                        }
319                    }
320                }
321            }
322        }
323
324        Ok(contexts)
325    }
326
327    /// Store search patterns for learning
328    fn store_search_pattern(&mut self, query: &str, results: &[FileMatch]) -> Result<()> {
329        // Create a memory anchor for this search pattern
330        let anchor_type = "search_pattern";
331        let keywords = vec![query.to_string()];
332
333        // Create context from top results
334        let context = results
335            .iter()
336            .take(3)
337            .map(|m| format!("{}:{}", m.path.display(), m.line_number))
338            .collect::<Vec<_>>()
339            .join(", ");
340
341        // Use MemoryManager to persist the pattern
342        self.memory_manager
343            .anchor(anchor_type, keywords, &context, "spicy_fuzzy")?;
344        Ok(())
345    }
346
347    /// Get suggested searches based on past patterns
348    pub fn get_suggestions(&mut self, partial_query: &str) -> Vec<String> {
349        self.memory_manager
350            .find(&[partial_query.to_string()])
351            .map(|mems| {
352                mems.into_iter()
353                    .filter_map(|m| m.keywords.first().cloned())
354                    .take(5)
355                    .collect()
356            })
357            .unwrap_or_default()
358    }
359}
360
361/// Integration point for the spicy TUI
362pub fn create_fuzzy_searcher() -> Result<SpicyFuzzySearch> {
363    SpicyFuzzySearch::new()
364}