ck_core/
lib.rs

1use serde::{Deserialize, Serialize};
2use std::path::{Path, PathBuf};
3use thiserror::Error;
4
5#[derive(Debug, Error)]
6pub enum CkError {
7    #[error("IO error: {0}")]
8    Io(#[from] std::io::Error),
9
10    #[error("Regex error: {0}")]
11    Regex(#[from] regex::Error),
12
13    #[error("Serialization error: {0}")]
14    Serialization(#[from] bincode::Error),
15
16    #[error("JSON error: {0}")]
17    Json(#[from] serde_json::Error),
18
19    #[error("Index error: {0}")]
20    Index(String),
21
22    #[error("Search error: {0}")]
23    Search(String),
24
25    #[error("Embedding error: {0}")]
26    Embedding(String),
27
28    #[error("Other error: {0}")]
29    Other(String),
30}
31
32pub type Result<T> = std::result::Result<T, CkError>;
33
34#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
35pub enum Language {
36    Rust,
37    Python,
38    JavaScript,
39    TypeScript,
40    Haskell,
41    Go,
42    Java,
43    C,
44    Cpp,
45    CSharp,
46    Ruby,
47    Php,
48    Swift,
49    Kotlin,
50}
51
52impl Language {
53    pub fn from_extension(ext: &str) -> Option<Self> {
54        match ext {
55            "rs" => Some(Language::Rust),
56            "py" => Some(Language::Python),
57            "js" => Some(Language::JavaScript),
58            "ts" | "tsx" => Some(Language::TypeScript),
59            "hs" | "lhs" => Some(Language::Haskell),
60            "go" => Some(Language::Go),
61            "java" => Some(Language::Java),
62            "c" => Some(Language::C),
63            "cpp" | "cc" | "cxx" | "c++" => Some(Language::Cpp),
64            "h" | "hpp" => Some(Language::Cpp), // Assume C++ for headers
65            "cs" => Some(Language::CSharp),
66            "rb" => Some(Language::Ruby),
67            "php" => Some(Language::Php),
68            "swift" => Some(Language::Swift),
69            "kt" | "kts" => Some(Language::Kotlin),
70            _ => None,
71        }
72    }
73
74    pub fn from_path(path: &Path) -> Option<Self> {
75        path.extension()
76            .and_then(|ext| ext.to_str())
77            .and_then(Self::from_extension)
78    }
79}
80
81impl std::fmt::Display for Language {
82    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83        let name = match self {
84            Language::Rust => "rust",
85            Language::Python => "python",
86            Language::JavaScript => "javascript",
87            Language::TypeScript => "typescript",
88            Language::Haskell => "haskell",
89            Language::Go => "go",
90            Language::Java => "java",
91            Language::C => "c",
92            Language::Cpp => "cpp",
93            Language::CSharp => "csharp",
94            Language::Ruby => "ruby",
95            Language::Php => "php",
96            Language::Swift => "swift",
97            Language::Kotlin => "kotlin",
98        };
99        write!(f, "{}", name)
100    }
101}
102
103#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct Span {
105    pub byte_start: usize,
106    pub byte_end: usize,
107    pub line_start: usize,
108    pub line_end: usize,
109}
110
111#[derive(Debug, Clone, Serialize, Deserialize)]
112pub struct FileMetadata {
113    pub path: PathBuf,
114    pub hash: String,
115    pub last_modified: u64,
116    pub size: u64,
117}
118
119#[derive(Debug, Clone, Serialize, Deserialize)]
120pub struct SearchResult {
121    pub file: PathBuf,
122    pub span: Span,
123    pub score: f32,
124    pub preview: String,
125    #[serde(skip_serializing_if = "Option::is_none")]
126    pub lang: Option<Language>,
127    #[serde(skip_serializing_if = "Option::is_none")]
128    pub symbol: Option<String>,
129    #[serde(skip_serializing_if = "Option::is_none")]
130    pub chunk_hash: Option<String>,
131    #[serde(skip_serializing_if = "Option::is_none")]
132    pub index_epoch: Option<u64>,
133}
134
135/// Enhanced search results that include near-miss information for threshold queries
136#[derive(Debug, Clone)]
137pub struct SearchResults {
138    pub matches: Vec<SearchResult>,
139    /// The highest scoring result below the threshold (if any)
140    pub closest_below_threshold: Option<SearchResult>,
141}
142
143#[derive(Debug, Clone, Serialize, Deserialize)]
144pub struct JsonSearchResult {
145    pub file: String,
146    pub span: Span,
147    pub lang: Option<Language>,
148    pub symbol: Option<String>,
149    pub score: f32,
150    pub signals: SearchSignals,
151    pub preview: String,
152    pub model: String,
153}
154
155#[derive(Debug, Clone, Serialize, Deserialize)]
156pub struct JsonlSearchResult {
157    pub path: String,
158    pub span: Span,
159    pub language: Option<String>,
160    #[serde(skip_serializing_if = "Option::is_none")]
161    pub snippet: Option<String>,
162    #[serde(skip_serializing_if = "Option::is_none")]
163    pub score: Option<f32>,
164    #[serde(skip_serializing_if = "Option::is_none")]
165    pub chunk_hash: Option<String>,
166    #[serde(skip_serializing_if = "Option::is_none")]
167    pub index_epoch: Option<u64>,
168}
169
170#[derive(Debug, Clone, Serialize, Deserialize)]
171pub struct SearchSignals {
172    pub lex_rank: Option<usize>,
173    pub vec_rank: Option<usize>,
174    pub rrf_score: f32,
175}
176
177#[derive(Debug, Clone, PartialEq)]
178pub enum SearchMode {
179    Regex,
180    Lexical,
181    Semantic,
182    Hybrid,
183}
184
185#[derive(Debug, Clone)]
186pub struct SearchOptions {
187    pub mode: SearchMode,
188    pub query: String,
189    pub path: PathBuf,
190    pub top_k: Option<usize>,
191    pub threshold: Option<f32>,
192    pub case_insensitive: bool,
193    pub whole_word: bool,
194    pub fixed_string: bool,
195    pub line_numbers: bool,
196    pub context_lines: usize,
197    pub before_context_lines: usize,
198    pub after_context_lines: usize,
199    pub recursive: bool,
200    pub json_output: bool,
201    pub jsonl_output: bool,
202    pub no_snippet: bool,
203    pub reindex: bool,
204    pub show_scores: bool,
205    pub show_filenames: bool,
206    pub files_with_matches: bool,
207    pub files_without_matches: bool,
208    pub exclude_patterns: Vec<String>,
209    pub respect_gitignore: bool,
210    pub full_section: bool,
211    // Enhanced embedding options (search-time only)
212    pub rerank: bool,
213    pub rerank_model: Option<String>,
214}
215
216impl JsonlSearchResult {
217    pub fn from_search_result(result: &SearchResult, include_snippet: bool) -> Self {
218        Self {
219            path: result.file.to_string_lossy().to_string(),
220            span: result.span.clone(),
221            language: result.lang.as_ref().map(|l| l.to_string()),
222            snippet: if include_snippet {
223                Some(result.preview.clone())
224            } else {
225                None
226            },
227            score: if result.score >= 0.0 {
228                Some(result.score)
229            } else {
230                None
231            },
232            chunk_hash: result.chunk_hash.clone(),
233            index_epoch: result.index_epoch,
234        }
235    }
236}
237
238impl Default for SearchOptions {
239    fn default() -> Self {
240        Self {
241            mode: SearchMode::Regex,
242            query: String::new(),
243            path: PathBuf::from("."),
244            top_k: None,
245            threshold: None,
246            case_insensitive: false,
247            whole_word: false,
248            fixed_string: false,
249            line_numbers: false,
250            context_lines: 0,
251            before_context_lines: 0,
252            after_context_lines: 0,
253            recursive: true,
254            json_output: false,
255            jsonl_output: false,
256            no_snippet: false,
257            reindex: false,
258            show_scores: false,
259            show_filenames: false,
260            files_with_matches: false,
261            files_without_matches: false,
262            exclude_patterns: get_default_exclude_patterns(),
263            respect_gitignore: true,
264            full_section: false,
265            // Enhanced embedding options (search-time only)
266            rerank: false,
267            rerank_model: None,
268        }
269    }
270}
271
272/// Get default exclusion patterns for directories that should be skipped during search.
273/// These are common cache, build, and system directories that rarely contain user code.
274pub fn get_default_exclude_patterns() -> Vec<String> {
275    vec![
276        // ck's own index directory
277        ".ck".to_string(),
278        // AI/ML model cache directories
279        ".fastembed_cache".to_string(),
280        ".cache".to_string(),
281        "__pycache__".to_string(),
282        // Version control
283        ".git".to_string(),
284        ".svn".to_string(),
285        ".hg".to_string(),
286        // Build directories
287        "target".to_string(),       // Rust
288        "build".to_string(),        // Various
289        "dist".to_string(),         // JavaScript/Python
290        "node_modules".to_string(), // JavaScript
291        ".gradle".to_string(),      // Java
292        ".mvn".to_string(),         // Maven
293        "bin".to_string(),          // Various
294        "obj".to_string(),          // .NET
295        // Python virtual environments
296        "venv".to_string(),
297        ".venv".to_string(),
298        "env".to_string(),
299        ".env".to_string(),
300        "virtualenv".to_string(),
301        // IDE/Editor directories
302        ".vscode".to_string(),
303        ".idea".to_string(),
304        ".eclipse".to_string(),
305        // Temporary directories
306        "tmp".to_string(),
307        "temp".to_string(),
308        ".tmp".to_string(),
309    ]
310}
311
312pub fn get_sidecar_path(repo_root: &Path, file_path: &Path) -> PathBuf {
313    let relative = file_path.strip_prefix(repo_root).unwrap_or(file_path);
314    let mut sidecar = repo_root.join(".ck");
315    sidecar.push(relative);
316    let ext = relative
317        .extension()
318        .map(|e| format!("{}.ck", e.to_string_lossy()))
319        .unwrap_or_else(|| "ck".to_string());
320    sidecar.set_extension(ext);
321    sidecar
322}
323
324pub fn compute_file_hash(path: &Path) -> Result<String> {
325    let data = std::fs::read(path)?;
326    let hash = blake3::hash(&data);
327    Ok(hash.to_hex().to_string())
328}
329
330#[cfg(test)]
331mod tests {
332    use super::*;
333    use std::fs;
334    use tempfile::TempDir;
335
336    #[test]
337    fn test_span_creation() {
338        let span = Span {
339            byte_start: 0,
340            byte_end: 10,
341            line_start: 1,
342            line_end: 2,
343        };
344
345        assert_eq!(span.byte_start, 0);
346        assert_eq!(span.byte_end, 10);
347        assert_eq!(span.line_start, 1);
348        assert_eq!(span.line_end, 2);
349    }
350
351    #[test]
352    fn test_search_options_default() {
353        let options = SearchOptions::default();
354        assert!(matches!(options.mode, SearchMode::Regex));
355        assert_eq!(options.query, "");
356        assert_eq!(options.path, PathBuf::from("."));
357        assert_eq!(options.top_k, None);
358        assert_eq!(options.threshold, None);
359        assert!(!options.case_insensitive);
360        assert!(!options.whole_word);
361        assert!(!options.fixed_string);
362        assert!(!options.line_numbers);
363        assert_eq!(options.context_lines, 0);
364        assert!(options.recursive);
365        assert!(!options.json_output);
366        assert!(!options.reindex);
367        assert!(!options.show_scores);
368        assert!(!options.show_filenames);
369    }
370
371    #[test]
372    fn test_file_metadata_serialization() {
373        let metadata = FileMetadata {
374            path: PathBuf::from("test.txt"),
375            hash: "abc123".to_string(),
376            last_modified: 1234567890,
377            size: 1024,
378        };
379
380        let json = serde_json::to_string(&metadata).unwrap();
381        let deserialized: FileMetadata = serde_json::from_str(&json).unwrap();
382
383        assert_eq!(metadata.path, deserialized.path);
384        assert_eq!(metadata.hash, deserialized.hash);
385        assert_eq!(metadata.last_modified, deserialized.last_modified);
386        assert_eq!(metadata.size, deserialized.size);
387    }
388
389    #[test]
390    fn test_search_result_serialization() {
391        let result = SearchResult {
392            file: PathBuf::from("test.txt"),
393            span: Span {
394                byte_start: 0,
395                byte_end: 10,
396                line_start: 1,
397                line_end: 1,
398            },
399            score: 0.95,
400            preview: "hello world".to_string(),
401            lang: Some(Language::Rust),
402            symbol: Some("main".to_string()),
403            chunk_hash: Some("abc123".to_string()),
404            index_epoch: Some(1699123456),
405        };
406
407        let json = serde_json::to_string(&result).unwrap();
408        let deserialized: SearchResult = serde_json::from_str(&json).unwrap();
409
410        assert_eq!(result.file, deserialized.file);
411        assert_eq!(result.score, deserialized.score);
412        assert_eq!(result.preview, deserialized.preview);
413        assert_eq!(result.lang, deserialized.lang);
414        assert_eq!(result.symbol, deserialized.symbol);
415        assert_eq!(result.chunk_hash, deserialized.chunk_hash);
416        assert_eq!(result.index_epoch, deserialized.index_epoch);
417    }
418
419    #[test]
420    fn test_jsonl_search_result_conversion() {
421        let result = SearchResult {
422            file: PathBuf::from("src/auth.rs"),
423            span: Span {
424                byte_start: 1203,
425                byte_end: 1456,
426                line_start: 42,
427                line_end: 58,
428            },
429            score: 0.89,
430            preview: "function authenticate(user) {...}".to_string(),
431            lang: Some(Language::Rust),
432            symbol: Some("authenticate".to_string()),
433            chunk_hash: Some("abc123def456".to_string()),
434            index_epoch: Some(1699123456),
435        };
436
437        // Test with snippet
438        let jsonl_with_snippet = JsonlSearchResult::from_search_result(&result, true);
439        assert_eq!(jsonl_with_snippet.path, "src/auth.rs");
440        assert_eq!(jsonl_with_snippet.span.line_start, 42);
441        assert_eq!(jsonl_with_snippet.language, Some("rust".to_string()));
442        assert_eq!(
443            jsonl_with_snippet.snippet,
444            Some("function authenticate(user) {...}".to_string())
445        );
446        assert_eq!(jsonl_with_snippet.score, Some(0.89));
447        assert_eq!(
448            jsonl_with_snippet.chunk_hash,
449            Some("abc123def456".to_string())
450        );
451        assert_eq!(jsonl_with_snippet.index_epoch, Some(1699123456));
452
453        // Test without snippet
454        let jsonl_no_snippet = JsonlSearchResult::from_search_result(&result, false);
455        assert_eq!(jsonl_no_snippet.snippet, None);
456        assert_eq!(jsonl_no_snippet.path, "src/auth.rs");
457    }
458
459    #[test]
460    fn test_get_sidecar_path() {
461        let repo_root = PathBuf::from("/home/user/project");
462        let file_path = PathBuf::from("/home/user/project/src/main.rs");
463
464        let sidecar = get_sidecar_path(&repo_root, &file_path);
465        let expected = PathBuf::from("/home/user/project/.ck/src/main.rs.ck");
466
467        assert_eq!(sidecar, expected);
468    }
469
470    #[test]
471    fn test_get_sidecar_path_no_extension() {
472        let repo_root = PathBuf::from("/project");
473        let file_path = PathBuf::from("/project/README");
474
475        let sidecar = get_sidecar_path(&repo_root, &file_path);
476        let expected = PathBuf::from("/project/.ck/README.ck");
477
478        assert_eq!(sidecar, expected);
479    }
480
481    #[test]
482    fn test_compute_file_hash() {
483        let temp_dir = TempDir::new().unwrap();
484        let file_path = temp_dir.path().join("test.txt");
485
486        fs::write(&file_path, "hello world").unwrap();
487
488        let hash1 = compute_file_hash(&file_path).unwrap();
489        let hash2 = compute_file_hash(&file_path).unwrap();
490
491        // Same content should produce same hash
492        assert_eq!(hash1, hash2);
493        assert!(!hash1.is_empty());
494
495        // Different content should produce different hash
496        fs::write(&file_path, "hello rust").unwrap();
497        let hash3 = compute_file_hash(&file_path).unwrap();
498        assert_ne!(hash1, hash3);
499    }
500
501    #[test]
502    fn test_compute_file_hash_nonexistent() {
503        let result = compute_file_hash(&PathBuf::from("nonexistent.txt"));
504        assert!(result.is_err());
505    }
506
507    #[test]
508    fn test_json_search_result_serialization() {
509        let signals = SearchSignals {
510            lex_rank: Some(1),
511            vec_rank: Some(2),
512            rrf_score: 0.85,
513        };
514
515        let result = JsonSearchResult {
516            file: "test.txt".to_string(),
517            span: Span {
518                byte_start: 0,
519                byte_end: 5,
520                line_start: 1,
521                line_end: 1,
522            },
523            lang: None, // txt is not a supported language
524            symbol: None,
525            score: 0.95,
526            signals,
527            preview: "hello".to_string(),
528            model: "bge-small".to_string(),
529        };
530
531        let json = serde_json::to_string(&result).unwrap();
532        let deserialized: JsonSearchResult = serde_json::from_str(&json).unwrap();
533
534        assert_eq!(result.file, deserialized.file);
535        assert_eq!(result.score, deserialized.score);
536        assert_eq!(result.signals.rrf_score, deserialized.signals.rrf_score);
537        assert_eq!(result.model, deserialized.model);
538    }
539
540    #[test]
541    fn test_language_from_extension() {
542        assert_eq!(Language::from_extension("rs"), Some(Language::Rust));
543        assert_eq!(Language::from_extension("py"), Some(Language::Python));
544        assert_eq!(Language::from_extension("js"), Some(Language::JavaScript));
545        assert_eq!(Language::from_extension("ts"), Some(Language::TypeScript));
546        assert_eq!(Language::from_extension("tsx"), Some(Language::TypeScript));
547        assert_eq!(Language::from_extension("hs"), Some(Language::Haskell));
548        assert_eq!(Language::from_extension("lhs"), Some(Language::Haskell));
549        assert_eq!(Language::from_extension("go"), Some(Language::Go));
550        assert_eq!(Language::from_extension("java"), Some(Language::Java));
551        assert_eq!(Language::from_extension("c"), Some(Language::C));
552        assert_eq!(Language::from_extension("cpp"), Some(Language::Cpp));
553        assert_eq!(Language::from_extension("cs"), Some(Language::CSharp));
554        assert_eq!(Language::from_extension("rb"), Some(Language::Ruby));
555        assert_eq!(Language::from_extension("php"), Some(Language::Php));
556        assert_eq!(Language::from_extension("swift"), Some(Language::Swift));
557        assert_eq!(Language::from_extension("kt"), Some(Language::Kotlin));
558        assert_eq!(Language::from_extension("kts"), Some(Language::Kotlin));
559        assert_eq!(Language::from_extension("unknown"), None);
560    }
561
562    #[test]
563    fn test_language_from_path() {
564        assert_eq!(
565            Language::from_path(&PathBuf::from("test.rs")),
566            Some(Language::Rust)
567        );
568        assert_eq!(
569            Language::from_path(&PathBuf::from("test.py")),
570            Some(Language::Python)
571        );
572        assert_eq!(
573            Language::from_path(&PathBuf::from("test.js")),
574            Some(Language::JavaScript)
575        );
576        assert_eq!(
577            Language::from_path(&PathBuf::from("test.hs")),
578            Some(Language::Haskell)
579        );
580        assert_eq!(
581            Language::from_path(&PathBuf::from("test.lhs")),
582            Some(Language::Haskell)
583        );
584        assert_eq!(
585            Language::from_path(&PathBuf::from("test.go")),
586            Some(Language::Go)
587        );
588        assert_eq!(Language::from_path(&PathBuf::from("test.unknown")), None); // unknown extensions return None
589        assert_eq!(Language::from_path(&PathBuf::from("noext")), None); // no extension
590    }
591
592    #[test]
593    fn test_language_display() {
594        assert_eq!(Language::Rust.to_string(), "rust");
595        assert_eq!(Language::Python.to_string(), "python");
596        assert_eq!(Language::JavaScript.to_string(), "javascript");
597        assert_eq!(Language::TypeScript.to_string(), "typescript");
598        assert_eq!(Language::Go.to_string(), "go");
599        assert_eq!(Language::Java.to_string(), "java");
600    }
601}