ck_core/
lib.rs

1use serde::{Deserialize, Serialize};
2use std::path::{Path, PathBuf};
3use thiserror::Error;
4
5#[derive(Debug, Error)]
6pub enum CkError {
7    #[error("IO error: {0}")]
8    Io(#[from] std::io::Error),
9
10    #[error("Regex error: {0}")]
11    Regex(#[from] regex::Error),
12
13    #[error("Serialization error: {0}")]
14    Serialization(#[from] bincode::Error),
15
16    #[error("JSON error: {0}")]
17    Json(#[from] serde_json::Error),
18
19    #[error("Index error: {0}")]
20    Index(String),
21
22    #[error("Search error: {0}")]
23    Search(String),
24
25    #[error("Embedding error: {0}")]
26    Embedding(String),
27
28    #[error("Other error: {0}")]
29    Other(String),
30}
31
32pub type Result<T> = std::result::Result<T, CkError>;
33
34#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
35pub enum Language {
36    Rust,
37    Python,
38    JavaScript,
39    TypeScript,
40    Haskell,
41    Go,
42    Java,
43    C,
44    Cpp,
45    CSharp,
46    Ruby,
47    Php,
48    Swift,
49    Kotlin,
50}
51
52impl Language {
53    pub fn from_extension(ext: &str) -> Option<Self> {
54        match ext {
55            "rs" => Some(Language::Rust),
56            "py" => Some(Language::Python),
57            "js" => Some(Language::JavaScript),
58            "ts" | "tsx" => Some(Language::TypeScript),
59            "hs" | "lhs" => Some(Language::Haskell),
60            "go" => Some(Language::Go),
61            "java" => Some(Language::Java),
62            "c" => Some(Language::C),
63            "cpp" | "cc" | "cxx" | "c++" => Some(Language::Cpp),
64            "h" | "hpp" => Some(Language::Cpp), // Assume C++ for headers
65            "cs" => Some(Language::CSharp),
66            "rb" => Some(Language::Ruby),
67            "php" => Some(Language::Php),
68            "swift" => Some(Language::Swift),
69            "kt" | "kts" => Some(Language::Kotlin),
70            _ => None,
71        }
72    }
73
74    pub fn from_path(path: &Path) -> Option<Self> {
75        path.extension()
76            .and_then(|ext| ext.to_str())
77            .and_then(Self::from_extension)
78    }
79}
80
81impl std::fmt::Display for Language {
82    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83        let name = match self {
84            Language::Rust => "rust",
85            Language::Python => "python",
86            Language::JavaScript => "javascript",
87            Language::TypeScript => "typescript",
88            Language::Haskell => "haskell",
89            Language::Go => "go",
90            Language::Java => "java",
91            Language::C => "c",
92            Language::Cpp => "cpp",
93            Language::CSharp => "csharp",
94            Language::Ruby => "ruby",
95            Language::Php => "php",
96            Language::Swift => "swift",
97            Language::Kotlin => "kotlin",
98        };
99        write!(f, "{}", name)
100    }
101}
102
103#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct Span {
105    pub byte_start: usize,
106    pub byte_end: usize,
107    pub line_start: usize,
108    pub line_end: usize,
109}
110
111#[derive(Debug, Clone, Serialize, Deserialize)]
112pub struct FileMetadata {
113    pub path: PathBuf,
114    pub hash: String,
115    pub last_modified: u64,
116    pub size: u64,
117}
118
119#[derive(Debug, Clone, Serialize, Deserialize)]
120pub struct SearchResult {
121    pub file: PathBuf,
122    pub span: Span,
123    pub score: f32,
124    pub preview: String,
125    #[serde(skip_serializing_if = "Option::is_none")]
126    pub lang: Option<Language>,
127    #[serde(skip_serializing_if = "Option::is_none")]
128    pub symbol: Option<String>,
129}
130
131#[derive(Debug, Clone, Serialize, Deserialize)]
132pub struct JsonSearchResult {
133    pub file: String,
134    pub span: Span,
135    pub lang: Option<Language>,
136    pub symbol: Option<String>,
137    pub score: f32,
138    pub signals: SearchSignals,
139    pub preview: String,
140    pub model: String,
141}
142
143#[derive(Debug, Clone, Serialize, Deserialize)]
144pub struct SearchSignals {
145    pub lex_rank: Option<usize>,
146    pub vec_rank: Option<usize>,
147    pub rrf_score: f32,
148}
149
150#[derive(Debug, Clone, PartialEq)]
151pub enum SearchMode {
152    Regex,
153    Lexical,
154    Semantic,
155    Hybrid,
156}
157
158#[derive(Debug, Clone)]
159pub struct SearchOptions {
160    pub mode: SearchMode,
161    pub query: String,
162    pub path: PathBuf,
163    pub top_k: Option<usize>,
164    pub threshold: Option<f32>,
165    pub case_insensitive: bool,
166    pub whole_word: bool,
167    pub fixed_string: bool,
168    pub line_numbers: bool,
169    pub context_lines: usize,
170    pub before_context_lines: usize,
171    pub after_context_lines: usize,
172    pub recursive: bool,
173    pub json_output: bool,
174    pub reindex: bool,
175    pub show_scores: bool,
176    pub show_filenames: bool,
177    pub files_with_matches: bool,
178    pub files_without_matches: bool,
179    pub exclude_patterns: Vec<String>,
180    pub respect_gitignore: bool,
181    pub full_section: bool,
182}
183
184impl Default for SearchOptions {
185    fn default() -> Self {
186        Self {
187            mode: SearchMode::Regex,
188            query: String::new(),
189            path: PathBuf::from("."),
190            top_k: None,
191            threshold: None,
192            case_insensitive: false,
193            whole_word: false,
194            fixed_string: false,
195            line_numbers: false,
196            context_lines: 0,
197            before_context_lines: 0,
198            after_context_lines: 0,
199            recursive: true,
200            json_output: false,
201            reindex: false,
202            show_scores: false,
203            show_filenames: false,
204            files_with_matches: false,
205            files_without_matches: false,
206            exclude_patterns: get_default_exclude_patterns(),
207            respect_gitignore: true,
208            full_section: false,
209        }
210    }
211}
212
213/// Get default exclusion patterns for directories that should be skipped during search.
214/// These are common cache, build, and system directories that rarely contain user code.
215pub fn get_default_exclude_patterns() -> Vec<String> {
216    vec![
217        // ck's own index directory
218        ".ck".to_string(),
219        // AI/ML model cache directories
220        ".fastembed_cache".to_string(),
221        ".cache".to_string(),
222        "__pycache__".to_string(),
223        // Version control
224        ".git".to_string(),
225        ".svn".to_string(),
226        ".hg".to_string(),
227        // Build directories
228        "target".to_string(),       // Rust
229        "build".to_string(),        // Various
230        "dist".to_string(),         // JavaScript/Python
231        "node_modules".to_string(), // JavaScript
232        ".gradle".to_string(),      // Java
233        ".mvn".to_string(),         // Maven
234        "bin".to_string(),          // Various
235        "obj".to_string(),          // .NET
236        // Python virtual environments
237        "venv".to_string(),
238        ".venv".to_string(),
239        "env".to_string(),
240        ".env".to_string(),
241        "virtualenv".to_string(),
242        // IDE/Editor directories
243        ".vscode".to_string(),
244        ".idea".to_string(),
245        ".eclipse".to_string(),
246        // Temporary directories
247        "tmp".to_string(),
248        "temp".to_string(),
249        ".tmp".to_string(),
250    ]
251}
252
253pub fn get_sidecar_path(repo_root: &Path, file_path: &Path) -> PathBuf {
254    let relative = file_path.strip_prefix(repo_root).unwrap_or(file_path);
255    let mut sidecar = repo_root.join(".ck");
256    sidecar.push(relative);
257    let ext = relative
258        .extension()
259        .map(|e| format!("{}.ck", e.to_string_lossy()))
260        .unwrap_or_else(|| "ck".to_string());
261    sidecar.set_extension(ext);
262    sidecar
263}
264
265pub fn compute_file_hash(path: &Path) -> Result<String> {
266    let data = std::fs::read(path)?;
267    let hash = blake3::hash(&data);
268    Ok(hash.to_hex().to_string())
269}
270
271#[cfg(test)]
272mod tests {
273    use super::*;
274    use std::fs;
275    use tempfile::TempDir;
276
277    #[test]
278    fn test_span_creation() {
279        let span = Span {
280            byte_start: 0,
281            byte_end: 10,
282            line_start: 1,
283            line_end: 2,
284        };
285
286        assert_eq!(span.byte_start, 0);
287        assert_eq!(span.byte_end, 10);
288        assert_eq!(span.line_start, 1);
289        assert_eq!(span.line_end, 2);
290    }
291
292    #[test]
293    fn test_search_options_default() {
294        let options = SearchOptions::default();
295        assert!(matches!(options.mode, SearchMode::Regex));
296        assert_eq!(options.query, "");
297        assert_eq!(options.path, PathBuf::from("."));
298        assert_eq!(options.top_k, None);
299        assert_eq!(options.threshold, None);
300        assert!(!options.case_insensitive);
301        assert!(!options.whole_word);
302        assert!(!options.fixed_string);
303        assert!(!options.line_numbers);
304        assert_eq!(options.context_lines, 0);
305        assert!(options.recursive);
306        assert!(!options.json_output);
307        assert!(!options.reindex);
308        assert!(!options.show_scores);
309        assert!(!options.show_filenames);
310    }
311
312    #[test]
313    fn test_file_metadata_serialization() {
314        let metadata = FileMetadata {
315            path: PathBuf::from("test.txt"),
316            hash: "abc123".to_string(),
317            last_modified: 1234567890,
318            size: 1024,
319        };
320
321        let json = serde_json::to_string(&metadata).unwrap();
322        let deserialized: FileMetadata = serde_json::from_str(&json).unwrap();
323
324        assert_eq!(metadata.path, deserialized.path);
325        assert_eq!(metadata.hash, deserialized.hash);
326        assert_eq!(metadata.last_modified, deserialized.last_modified);
327        assert_eq!(metadata.size, deserialized.size);
328    }
329
330    #[test]
331    fn test_search_result_serialization() {
332        let result = SearchResult {
333            file: PathBuf::from("test.txt"),
334            span: Span {
335                byte_start: 0,
336                byte_end: 10,
337                line_start: 1,
338                line_end: 1,
339            },
340            score: 0.95,
341            preview: "hello world".to_string(),
342            lang: Some(Language::Rust),
343            symbol: Some("main".to_string()),
344        };
345
346        let json = serde_json::to_string(&result).unwrap();
347        let deserialized: SearchResult = serde_json::from_str(&json).unwrap();
348
349        assert_eq!(result.file, deserialized.file);
350        assert_eq!(result.score, deserialized.score);
351        assert_eq!(result.preview, deserialized.preview);
352        assert_eq!(result.lang, deserialized.lang);
353        assert_eq!(result.symbol, deserialized.symbol);
354    }
355
356    #[test]
357    fn test_get_sidecar_path() {
358        let repo_root = PathBuf::from("/home/user/project");
359        let file_path = PathBuf::from("/home/user/project/src/main.rs");
360
361        let sidecar = get_sidecar_path(&repo_root, &file_path);
362        let expected = PathBuf::from("/home/user/project/.ck/src/main.rs.ck");
363
364        assert_eq!(sidecar, expected);
365    }
366
367    #[test]
368    fn test_get_sidecar_path_no_extension() {
369        let repo_root = PathBuf::from("/project");
370        let file_path = PathBuf::from("/project/README");
371
372        let sidecar = get_sidecar_path(&repo_root, &file_path);
373        let expected = PathBuf::from("/project/.ck/README.ck");
374
375        assert_eq!(sidecar, expected);
376    }
377
378    #[test]
379    fn test_compute_file_hash() {
380        let temp_dir = TempDir::new().unwrap();
381        let file_path = temp_dir.path().join("test.txt");
382
383        fs::write(&file_path, "hello world").unwrap();
384
385        let hash1 = compute_file_hash(&file_path).unwrap();
386        let hash2 = compute_file_hash(&file_path).unwrap();
387
388        // Same content should produce same hash
389        assert_eq!(hash1, hash2);
390        assert!(!hash1.is_empty());
391
392        // Different content should produce different hash
393        fs::write(&file_path, "hello rust").unwrap();
394        let hash3 = compute_file_hash(&file_path).unwrap();
395        assert_ne!(hash1, hash3);
396    }
397
398    #[test]
399    fn test_compute_file_hash_nonexistent() {
400        let result = compute_file_hash(&PathBuf::from("nonexistent.txt"));
401        assert!(result.is_err());
402    }
403
404    #[test]
405    fn test_json_search_result_serialization() {
406        let signals = SearchSignals {
407            lex_rank: Some(1),
408            vec_rank: Some(2),
409            rrf_score: 0.85,
410        };
411
412        let result = JsonSearchResult {
413            file: "test.txt".to_string(),
414            span: Span {
415                byte_start: 0,
416                byte_end: 5,
417                line_start: 1,
418                line_end: 1,
419            },
420            lang: None, // txt is not a supported language
421            symbol: None,
422            score: 0.95,
423            signals,
424            preview: "hello".to_string(),
425            model: "bge-small".to_string(),
426        };
427
428        let json = serde_json::to_string(&result).unwrap();
429        let deserialized: JsonSearchResult = serde_json::from_str(&json).unwrap();
430
431        assert_eq!(result.file, deserialized.file);
432        assert_eq!(result.score, deserialized.score);
433        assert_eq!(result.signals.rrf_score, deserialized.signals.rrf_score);
434        assert_eq!(result.model, deserialized.model);
435    }
436
437    #[test]
438    fn test_language_from_extension() {
439        assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
440        assert_eq!(Language::from_extension("py"), Some(Language::Python));
441        assert_eq!(Language::from_extension("js"), Some(Language::JavaScript));
442        assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
443        assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
444        assert_eq!(Language::from_extension("hs"), Some(Language::Haskell));
445        assert_eq!(Language::from_extension("lhs"), Some(Language::Haskell));
446        assert_eq!(Language::from_extension("go"), Some(Language::Go));
447        assert_eq!(Language::from_extension("java"), Some(Language::Java));
448        assert_eq!(Language::from_extension("c"), Some(Language::C));
449        assert_eq!(Language::from_extension("cpp"), Some(Language::Cpp));
450        assert_eq!(Language::from_extension("cs"), Some(Language::CSharp));
451        assert_eq!(Language::from_extension("rb"), Some(Language::Ruby));
452        assert_eq!(Language::from_extension("php"), Some(Language::Php));
453        assert_eq!(Language::from_extension("swift"), Some(Language::Swift));
454        assert_eq!(Language::from_extension("kt"), Some(Language::Kotlin));
455        assert_eq!(Language::from_extension("kts"), Some(Language::Kotlin));
456        assert_eq!(Language::from_extension("unknown"), None);
457    }
458
459    #[test]
460    fn test_language_from_path() {
461        assert_eq!(
462            Language::from_path(&PathBuf::from("test.rs")),
463            Some(Language::Rust)
464        );
465        assert_eq!(
466            Language::from_path(&PathBuf::from("test.py")),
467            Some(Language::Python)
468        );
469        assert_eq!(
470            Language::from_path(&PathBuf::from("test.js")),
471            Some(Language::JavaScript)
472        );
473        assert_eq!(
474            Language::from_path(&PathBuf::from("test.hs")),
475            Some(Language::Haskell)
476        );
477        assert_eq!(
478            Language::from_path(&PathBuf::from("test.lhs")),
479            Some(Language::Haskell)
480        );
481        assert_eq!(
482            Language::from_path(&PathBuf::from("test.go")),
483            Some(Language::Go)
484        );
485        assert_eq!(Language::from_path(&PathBuf::from("test.unknown")), None); // unknown extensions return None
486        assert_eq!(Language::from_path(&PathBuf::from("noext")), None); // no extension
487    }
488
489    #[test]
490    fn test_language_display() {
491        assert_eq!(Language::Rust.to_string(), "rust");
492        assert_eq!(Language::Python.to_string(), "python");
493        assert_eq!(Language::JavaScript.to_string(), "javascript");
494        assert_eq!(Language::TypeScript.to_string(), "typescript");
495        assert_eq!(Language::Go.to_string(), "go");
496        assert_eq!(Language::Java.to_string(), "java");
497    }
498}