ck_core/
lib.rs

1use std::path::{Path, PathBuf};
2use serde::{Deserialize, Serialize};
3use thiserror::Error;
4
5#[derive(Debug, Error)]
6pub enum CkError {
7    #[error("IO error: {0}")]
8    Io(#[from] std::io::Error),
9    
10    #[error("Regex error: {0}")]
11    Regex(#[from] regex::Error),
12    
13    #[error("Serialization error: {0}")]
14    Serialization(#[from] bincode::Error),
15    
16    #[error("JSON error: {0}")]
17    Json(#[from] serde_json::Error),
18    
19    #[error("Index error: {0}")]
20    Index(String),
21    
22    #[error("Search error: {0}")]
23    Search(String),
24    
25    #[error("Embedding error: {0}")]
26    Embedding(String),
27    
28    #[error("Other error: {0}")]
29    Other(String),
30}
31
32pub type Result<T> = std::result::Result<T, CkError>;
33
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct Span {
36    pub byte_start: usize,
37    pub byte_end: usize,
38    pub line_start: usize,
39    pub line_end: usize,
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct FileMetadata {
44    pub path: PathBuf,
45    pub hash: String,
46    pub last_modified: u64,
47    pub size: u64,
48}
49
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct SearchResult {
52    pub file: PathBuf,
53    pub span: Span,
54    pub score: f32,
55    pub preview: String,
56    #[serde(skip_serializing_if = "Option::is_none")]
57    pub lang: Option<String>,
58    #[serde(skip_serializing_if = "Option::is_none")]
59    pub symbol: Option<String>,
60}
61
62#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct JsonSearchResult {
64    pub file: String,
65    pub span: Span,
66    pub lang: Option<String>,
67    pub symbol: Option<String>,
68    pub score: f32,
69    pub signals: SearchSignals,
70    pub preview: String,
71    pub model: String,
72}
73
74#[derive(Debug, Clone, Serialize, Deserialize)]
75pub struct SearchSignals {
76    pub lex_rank: Option<usize>,
77    pub vec_rank: Option<usize>,
78    pub rrf_score: f32,
79}
80
81#[derive(Debug, Clone, PartialEq)]
82pub enum SearchMode {
83    Regex,
84    Lexical,
85    Semantic,
86    Hybrid,
87}
88
89#[derive(Debug, Clone)]
90pub struct SearchOptions {
91    pub mode: SearchMode,
92    pub query: String,
93    pub path: PathBuf,
94    pub top_k: Option<usize>,
95    pub threshold: Option<f32>,
96    pub case_insensitive: bool,
97    pub whole_word: bool,
98    pub fixed_string: bool,
99    pub line_numbers: bool,
100    pub context_lines: usize,
101    pub before_context_lines: usize,
102    pub after_context_lines: usize,
103    pub recursive: bool,
104    pub json_output: bool,
105    pub reindex: bool,
106    pub show_scores: bool,
107    pub show_filenames: bool,
108    pub files_with_matches: bool,
109    pub files_without_matches: bool,
110    pub exclude_patterns: Vec<String>,
111    pub full_section: bool,
112}
113
114impl Default for SearchOptions {
115    fn default() -> Self {
116        Self {
117            mode: SearchMode::Regex,
118            query: String::new(),
119            path: PathBuf::from("."),
120            top_k: None,
121            threshold: None,
122            case_insensitive: false,
123            whole_word: false,
124            fixed_string: false,
125            line_numbers: false,
126            context_lines: 0,
127            before_context_lines: 0,
128            after_context_lines: 0,
129            recursive: true,
130            json_output: false,
131            reindex: false,
132            show_scores: false,
133            show_filenames: false,
134            files_with_matches: false,
135            files_without_matches: false,
136            exclude_patterns: get_default_exclude_patterns(),
137            full_section: false,
138        }
139    }
140}
141
142/// Get default exclusion patterns for directories that should be skipped during search.
143/// These are common cache, build, and system directories that rarely contain user code.
144pub fn get_default_exclude_patterns() -> Vec<String> {
145    vec![
146        // ck's own index directory
147        ".ck".to_string(),
148        
149        // AI/ML model cache directories
150        ".fastembed_cache".to_string(),
151        ".cache".to_string(),
152        "__pycache__".to_string(),
153        
154        // Version control
155        ".git".to_string(),
156        ".svn".to_string(),
157        ".hg".to_string(),
158        
159        // Build directories
160        "target".to_string(),        // Rust
161        "build".to_string(),         // Various
162        "dist".to_string(),          // JavaScript/Python
163        "node_modules".to_string(),  // JavaScript
164        ".gradle".to_string(),       // Java
165        ".mvn".to_string(),          // Maven
166        "bin".to_string(),           // Various
167        "obj".to_string(),           // .NET
168        
169        // Python virtual environments
170        "venv".to_string(),
171        ".venv".to_string(),
172        "env".to_string(),
173        ".env".to_string(),
174        "virtualenv".to_string(),
175        
176        // IDE/Editor directories
177        ".vscode".to_string(),
178        ".idea".to_string(),
179        ".eclipse".to_string(),
180        
181        // Temporary directories
182        "tmp".to_string(),
183        "temp".to_string(),
184        ".tmp".to_string(),
185    ]
186}
187
188pub fn get_sidecar_path(repo_root: &Path, file_path: &Path) -> PathBuf {
189    let relative = file_path.strip_prefix(repo_root).unwrap_or(file_path);
190    let mut sidecar = repo_root.join(".ck");
191    sidecar.push(relative);
192    let ext = relative
193        .extension()
194        .map(|e| format!("{}.ck", e.to_string_lossy()))
195        .unwrap_or_else(|| "ck".to_string());
196    sidecar.set_extension(ext);
197    sidecar
198}
199
200pub fn compute_file_hash(path: &Path) -> Result<String> {
201    let data = std::fs::read(path)?;
202    let hash = blake3::hash(&data);
203    Ok(hash.to_hex().to_string())
204}
205
206#[cfg(test)]
207mod tests {
208    use super::*;
209    use std::fs;
210    use tempfile::TempDir;
211
212    #[test]
213    fn test_span_creation() {
214        let span = Span {
215            byte_start: 0,
216            byte_end: 10,
217            line_start: 1,
218            line_end: 2,
219        };
220        
221        assert_eq!(span.byte_start, 0);
222        assert_eq!(span.byte_end, 10);
223        assert_eq!(span.line_start, 1);
224        assert_eq!(span.line_end, 2);
225    }
226
227    #[test]
228    fn test_search_options_default() {
229        let options = SearchOptions::default();
230        assert!(matches!(options.mode, SearchMode::Regex));
231        assert_eq!(options.query, "");
232        assert_eq!(options.path, PathBuf::from("."));
233        assert_eq!(options.top_k, None);
234        assert_eq!(options.threshold, None);
235        assert!(!options.case_insensitive);
236        assert!(!options.whole_word);
237        assert!(!options.fixed_string);
238        assert!(!options.line_numbers);
239        assert_eq!(options.context_lines, 0);
240        assert!(options.recursive);
241        assert!(!options.json_output);
242        assert!(!options.reindex);
243        assert!(!options.show_scores);
244        assert!(!options.show_filenames);
245    }
246
247    #[test]
248    fn test_file_metadata_serialization() {
249        let metadata = FileMetadata {
250            path: PathBuf::from("test.txt"),
251            hash: "abc123".to_string(),
252            last_modified: 1234567890,
253            size: 1024,
254        };
255
256        let json = serde_json::to_string(&metadata).unwrap();
257        let deserialized: FileMetadata = serde_json::from_str(&json).unwrap();
258
259        assert_eq!(metadata.path, deserialized.path);
260        assert_eq!(metadata.hash, deserialized.hash);
261        assert_eq!(metadata.last_modified, deserialized.last_modified);
262        assert_eq!(metadata.size, deserialized.size);
263    }
264
265    #[test]
266    fn test_search_result_serialization() {
267        let result = SearchResult {
268            file: PathBuf::from("test.txt"),
269            span: Span {
270                byte_start: 0,
271                byte_end: 10,
272                line_start: 1,
273                line_end: 1,
274            },
275            score: 0.95,
276            preview: "hello world".to_string(),
277            lang: Some("rust".to_string()),
278            symbol: Some("main".to_string()),
279        };
280
281        let json = serde_json::to_string(&result).unwrap();
282        let deserialized: SearchResult = serde_json::from_str(&json).unwrap();
283
284        assert_eq!(result.file, deserialized.file);
285        assert_eq!(result.score, deserialized.score);
286        assert_eq!(result.preview, deserialized.preview);
287        assert_eq!(result.lang, deserialized.lang);
288        assert_eq!(result.symbol, deserialized.symbol);
289    }
290
291    #[test]
292    fn test_get_sidecar_path() {
293        let repo_root = PathBuf::from("/home/user/project");
294        let file_path = PathBuf::from("/home/user/project/src/main.rs");
295        
296        let sidecar = get_sidecar_path(&repo_root, &file_path);
297        let expected = PathBuf::from("/home/user/project/.ck/src/main.rs.ck");
298        
299        assert_eq!(sidecar, expected);
300    }
301
302    #[test]
303    fn test_get_sidecar_path_no_extension() {
304        let repo_root = PathBuf::from("/project");
305        let file_path = PathBuf::from("/project/README");
306        
307        let sidecar = get_sidecar_path(&repo_root, &file_path);
308        let expected = PathBuf::from("/project/.ck/README.ck");
309        
310        assert_eq!(sidecar, expected);
311    }
312
313    #[test]
314    fn test_compute_file_hash() {
315        let temp_dir = TempDir::new().unwrap();
316        let file_path = temp_dir.path().join("test.txt");
317        
318        fs::write(&file_path, "hello world").unwrap();
319        
320        let hash1 = compute_file_hash(&file_path).unwrap();
321        let hash2 = compute_file_hash(&file_path).unwrap();
322        
323        // Same content should produce same hash
324        assert_eq!(hash1, hash2);
325        assert!(!hash1.is_empty());
326        
327        // Different content should produce different hash
328        fs::write(&file_path, "hello rust").unwrap();
329        let hash3 = compute_file_hash(&file_path).unwrap();
330        assert_ne!(hash1, hash3);
331    }
332
333    #[test]
334    fn test_compute_file_hash_nonexistent() {
335        let result = compute_file_hash(&PathBuf::from("nonexistent.txt"));
336        assert!(result.is_err());
337    }
338
339    #[test]
340    fn test_json_search_result_serialization() {
341        let signals = SearchSignals {
342            lex_rank: Some(1),
343            vec_rank: Some(2),
344            rrf_score: 0.85,
345        };
346
347        let result = JsonSearchResult {
348            file: "test.txt".to_string(),
349            span: Span {
350                byte_start: 0,
351                byte_end: 5,
352                line_start: 1,
353                line_end: 1,
354            },
355            lang: Some("txt".to_string()),
356            symbol: None,
357            score: 0.95,
358            signals,
359            preview: "hello".to_string(),
360            model: "bge-small".to_string(),
361        };
362
363        let json = serde_json::to_string(&result).unwrap();
364        let deserialized: JsonSearchResult = serde_json::from_str(&json).unwrap();
365
366        assert_eq!(result.file, deserialized.file);
367        assert_eq!(result.score, deserialized.score);
368        assert_eq!(result.signals.rrf_score, deserialized.signals.rrf_score);
369        assert_eq!(result.model, deserialized.model);
370    }
371}