ck_core/
lib.rs

1use std::path::{Path, PathBuf};
2use serde::{Deserialize, Serialize};
3use thiserror::Error;
4
5#[derive(Debug, Error)]
6pub enum CkError {
7    #[error("IO error: {0}")]
8    Io(#[from] std::io::Error),
9    
10    #[error("Regex error: {0}")]
11    Regex(#[from] regex::Error),
12    
13    #[error("Serialization error: {0}")]
14    Serialization(#[from] bincode::Error),
15    
16    #[error("JSON error: {0}")]
17    Json(#[from] serde_json::Error),
18    
19    #[error("Index error: {0}")]
20    Index(String),
21    
22    #[error("Search error: {0}")]
23    Search(String),
24    
25    #[error("Embedding error: {0}")]
26    Embedding(String),
27    
28    #[error("Other error: {0}")]
29    Other(String),
30}
31
32pub type Result<T> = std::result::Result<T, CkError>;
33
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct Span {
36    pub byte_start: usize,
37    pub byte_end: usize,
38    pub line_start: usize,
39    pub line_end: usize,
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct FileMetadata {
44    pub path: PathBuf,
45    pub hash: String,
46    pub last_modified: u64,
47    pub size: u64,
48}
49
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct SearchResult {
52    pub file: PathBuf,
53    pub span: Span,
54    pub score: f32,
55    pub preview: String,
56    #[serde(skip_serializing_if = "Option::is_none")]
57    pub lang: Option<String>,
58    #[serde(skip_serializing_if = "Option::is_none")]
59    pub symbol: Option<String>,
60}
61
62#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct JsonSearchResult {
64    pub file: String,
65    pub span: Span,
66    pub lang: Option<String>,
67    pub symbol: Option<String>,
68    pub score: f32,
69    pub signals: SearchSignals,
70    pub preview: String,
71    pub model: String,
72}
73
74#[derive(Debug, Clone, Serialize, Deserialize)]
75pub struct SearchSignals {
76    pub lex_rank: Option<usize>,
77    pub vec_rank: Option<usize>,
78    pub rrf_score: f32,
79}
80
81#[derive(Debug, Clone, PartialEq)]
82pub enum SearchMode {
83    Regex,
84    Lexical,
85    Semantic,
86    Hybrid,
87}
88
89#[derive(Debug, Clone)]
90pub struct SearchOptions {
91    pub mode: SearchMode,
92    pub query: String,
93    pub path: PathBuf,
94    pub top_k: Option<usize>,
95    pub threshold: Option<f32>,
96    pub case_insensitive: bool,
97    pub whole_word: bool,
98    pub fixed_string: bool,
99    pub line_numbers: bool,
100    pub context_lines: usize,
101    pub before_context_lines: usize,
102    pub after_context_lines: usize,
103    pub recursive: bool,
104    pub json_output: bool,
105    pub reindex: bool,
106    pub show_scores: bool,
107    pub show_filenames: bool,
108    pub files_with_matches: bool,
109    pub files_without_matches: bool,
110    pub exclude_patterns: Vec<String>,
111    pub respect_gitignore: bool,
112    pub full_section: bool,
113}
114
115impl Default for SearchOptions {
116    fn default() -> Self {
117        Self {
118            mode: SearchMode::Regex,
119            query: String::new(),
120            path: PathBuf::from("."),
121            top_k: None,
122            threshold: None,
123            case_insensitive: false,
124            whole_word: false,
125            fixed_string: false,
126            line_numbers: false,
127            context_lines: 0,
128            before_context_lines: 0,
129            after_context_lines: 0,
130            recursive: true,
131            json_output: false,
132            reindex: false,
133            show_scores: false,
134            show_filenames: false,
135            files_with_matches: false,
136            files_without_matches: false,
137            exclude_patterns: get_default_exclude_patterns(),
138            respect_gitignore: true,
139            full_section: false,
140        }
141    }
142}
143
144/// Get default exclusion patterns for directories that should be skipped during search.
145/// These are common cache, build, and system directories that rarely contain user code.
146pub fn get_default_exclude_patterns() -> Vec<String> {
147    vec![
148        // ck's own index directory
149        ".ck".to_string(),
150        
151        // AI/ML model cache directories
152        ".fastembed_cache".to_string(),
153        ".cache".to_string(),
154        "__pycache__".to_string(),
155        
156        // Version control
157        ".git".to_string(),
158        ".svn".to_string(),
159        ".hg".to_string(),
160        
161        // Build directories
162        "target".to_string(),        // Rust
163        "build".to_string(),         // Various
164        "dist".to_string(),          // JavaScript/Python
165        "node_modules".to_string(),  // JavaScript
166        ".gradle".to_string(),       // Java
167        ".mvn".to_string(),          // Maven
168        "bin".to_string(),           // Various
169        "obj".to_string(),           // .NET
170        
171        // Python virtual environments
172        "venv".to_string(),
173        ".venv".to_string(),
174        "env".to_string(),
175        ".env".to_string(),
176        "virtualenv".to_string(),
177        
178        // IDE/Editor directories
179        ".vscode".to_string(),
180        ".idea".to_string(),
181        ".eclipse".to_string(),
182        
183        // Temporary directories
184        "tmp".to_string(),
185        "temp".to_string(),
186        ".tmp".to_string(),
187    ]
188}
189
190pub fn get_sidecar_path(repo_root: &Path, file_path: &Path) -> PathBuf {
191    let relative = file_path.strip_prefix(repo_root).unwrap_or(file_path);
192    let mut sidecar = repo_root.join(".ck");
193    sidecar.push(relative);
194    let ext = relative
195        .extension()
196        .map(|e| format!("{}.ck", e.to_string_lossy()))
197        .unwrap_or_else(|| "ck".to_string());
198    sidecar.set_extension(ext);
199    sidecar
200}
201
202pub fn compute_file_hash(path: &Path) -> Result<String> {
203    let data = std::fs::read(path)?;
204    let hash = blake3::hash(&data);
205    Ok(hash.to_hex().to_string())
206}
207
208#[cfg(test)]
209mod tests {
210    use super::*;
211    use std::fs;
212    use tempfile::TempDir;
213
214    #[test]
215    fn test_span_creation() {
216        let span = Span {
217            byte_start: 0,
218            byte_end: 10,
219            line_start: 1,
220            line_end: 2,
221        };
222        
223        assert_eq!(span.byte_start, 0);
224        assert_eq!(span.byte_end, 10);
225        assert_eq!(span.line_start, 1);
226        assert_eq!(span.line_end, 2);
227    }
228
229    #[test]
230    fn test_search_options_default() {
231        let options = SearchOptions::default();
232        assert!(matches!(options.mode, SearchMode::Regex));
233        assert_eq!(options.query, "");
234        assert_eq!(options.path, PathBuf::from("."));
235        assert_eq!(options.top_k, None);
236        assert_eq!(options.threshold, None);
237        assert!(!options.case_insensitive);
238        assert!(!options.whole_word);
239        assert!(!options.fixed_string);
240        assert!(!options.line_numbers);
241        assert_eq!(options.context_lines, 0);
242        assert!(options.recursive);
243        assert!(!options.json_output);
244        assert!(!options.reindex);
245        assert!(!options.show_scores);
246        assert!(!options.show_filenames);
247    }
248
249    #[test]
250    fn test_file_metadata_serialization() {
251        let metadata = FileMetadata {
252            path: PathBuf::from("test.txt"),
253            hash: "abc123".to_string(),
254            last_modified: 1234567890,
255            size: 1024,
256        };
257
258        let json = serde_json::to_string(&metadata).unwrap();
259        let deserialized: FileMetadata = serde_json::from_str(&json).unwrap();
260
261        assert_eq!(metadata.path, deserialized.path);
262        assert_eq!(metadata.hash, deserialized.hash);
263        assert_eq!(metadata.last_modified, deserialized.last_modified);
264        assert_eq!(metadata.size, deserialized.size);
265    }
266
267    #[test]
268    fn test_search_result_serialization() {
269        let result = SearchResult {
270            file: PathBuf::from("test.txt"),
271            span: Span {
272                byte_start: 0,
273                byte_end: 10,
274                line_start: 1,
275                line_end: 1,
276            },
277            score: 0.95,
278            preview: "hello world".to_string(),
279            lang: Some("rust".to_string()),
280            symbol: Some("main".to_string()),
281        };
282
283        let json = serde_json::to_string(&result).unwrap();
284        let deserialized: SearchResult = serde_json::from_str(&json).unwrap();
285
286        assert_eq!(result.file, deserialized.file);
287        assert_eq!(result.score, deserialized.score);
288        assert_eq!(result.preview, deserialized.preview);
289        assert_eq!(result.lang, deserialized.lang);
290        assert_eq!(result.symbol, deserialized.symbol);
291    }
292
293    #[test]
294    fn test_get_sidecar_path() {
295        let repo_root = PathBuf::from("/home/user/project");
296        let file_path = PathBuf::from("/home/user/project/src/main.rs");
297        
298        let sidecar = get_sidecar_path(&repo_root, &file_path);
299        let expected = PathBuf::from("/home/user/project/.ck/src/main.rs.ck");
300        
301        assert_eq!(sidecar, expected);
302    }
303
304    #[test]
305    fn test_get_sidecar_path_no_extension() {
306        let repo_root = PathBuf::from("/project");
307        let file_path = PathBuf::from("/project/README");
308        
309        let sidecar = get_sidecar_path(&repo_root, &file_path);
310        let expected = PathBuf::from("/project/.ck/README.ck");
311        
312        assert_eq!(sidecar, expected);
313    }
314
315    #[test]
316    fn test_compute_file_hash() {
317        let temp_dir = TempDir::new().unwrap();
318        let file_path = temp_dir.path().join("test.txt");
319        
320        fs::write(&file_path, "hello world").unwrap();
321        
322        let hash1 = compute_file_hash(&file_path).unwrap();
323        let hash2 = compute_file_hash(&file_path).unwrap();
324        
325        // Same content should produce same hash
326        assert_eq!(hash1, hash2);
327        assert!(!hash1.is_empty());
328        
329        // Different content should produce different hash
330        fs::write(&file_path, "hello rust").unwrap();
331        let hash3 = compute_file_hash(&file_path).unwrap();
332        assert_ne!(hash1, hash3);
333    }
334
335    #[test]
336    fn test_compute_file_hash_nonexistent() {
337        let result = compute_file_hash(&PathBuf::from("nonexistent.txt"));
338        assert!(result.is_err());
339    }
340
341    #[test]
342    fn test_json_search_result_serialization() {
343        let signals = SearchSignals {
344            lex_rank: Some(1),
345            vec_rank: Some(2),
346            rrf_score: 0.85,
347        };
348
349        let result = JsonSearchResult {
350            file: "test.txt".to_string(),
351            span: Span {
352                byte_start: 0,
353                byte_end: 5,
354                line_start: 1,
355                line_end: 1,
356            },
357            lang: Some("txt".to_string()),
358            symbol: None,
359            score: 0.95,
360            signals,
361            preview: "hello".to_string(),
362            model: "bge-small".to_string(),
363        };
364
365        let json = serde_json::to_string(&result).unwrap();
366        let deserialized: JsonSearchResult = serde_json::from_str(&json).unwrap();
367
368        assert_eq!(result.file, deserialized.file);
369        assert_eq!(result.score, deserialized.score);
370        assert_eq!(result.signals.rrf_score, deserialized.signals.rrf_score);
371        assert_eq!(result.model, deserialized.model);
372    }
373}